### Load libs

In [1]:
import pandas as pd
import json
import numpy as np
import random
from pandarallel import pandarallel
import re
import time
import multiprocessing
from multiprocessing import Pool
from joblib import Parallel, delayed
import sys

pandarallel.initialize()


INFO: Pandarallel will run on 12 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


In [2]:
n = 750
pd.set_option('display.max_columns', n)
pd.set_option('display.max_rows', n)
pd.set_option('display.max_colwidth', -1)


  after removing the cwd from sys.path.


### Load data

In [3]:
term = 'AmeriCredit Automobile Receivables Trust 2017-1 Data Tape'
finder = re.compile('\d{4,}\W\d{1,}')
add_id = re.findall(finder, term)[0]
add_id


'2017-1'

In [4]:
# load abs
folder = 'data/combined/'
file = '{}.csv'.format(term)
path = folder + file
data = pd.read_csv(path)
data.shape


  interactivity=interactivity, compiler=compiler, result=result)


(1509167, 73)

In [5]:
# load fields
f_folder = 'data/json/fields/'
f_file = 'fields.json'
f_path = f_folder + f_file
with open(f_path) as f:
    fields = json.load(f)


In [6]:
# load mapper
m_folder = 'data/dictionary/mapper/'
m_file = 'mapper.json'
m_path = m_folder + m_file
with open(m_path) as f:
    mapper = json.load(f)
    

### Setting fields

In [7]:
init_id = fields['init_id'][0]
date_cols = fields['dates']
replacer_cols = fields['replace_dash']
clean_cols = fields['clean']
m_cols = fields['map']
event_cols = fields['event']
loc_cols = fields['all_loc']
numeric_cols = fields['numeric']
all_vals_cols = fields['all_vals']
min_max_cols = fields['min_max']


### ID and dates

In [8]:
def reorder_date(init):
    
    """
    Reorder date
    """
    
    init = str(init)
    if init != '-':
        if '/' not in init:
            y = init[6:10]
            m = init[0:2]
            d = init[3:5]
            date = y + '-' + m + '-' + d
        elif '/' in init:
            y = init[3:7]
            m = init[0:2]
            date = y + '-' + m
    else:
        date = ''
    
    return date


In [9]:
data['ID'] = data[init_id].str.replace('=', '').str.replace('"', '').str.strip() + '-' + add_id


In [10]:
for col in date_cols:
    print(col)
    values = data[col].values
    dates = [reorder_date(v) for v in values]
    data['{}R'.format(col)] = dates
    

reportingPeriodEndingDate
originalFirstPaymentDate
loanMaturityDate
interestPaidThroughDate
reportingPeriodBeginningDate
DemandResolutionDate
originationDate
mostRecentServicingTransferReceivedDate
zeroBalanceEffectiveDate


In [11]:
# s_col = 'loanMaturityDate'
# t_col = '{}R'.format(s_col)
# data[[s_col, t_col]]


### Replacing values

In [12]:
data[replacer_cols] = data[replacer_cols].replace('-', np.nan)


In [13]:
# clean cols
for col in clean_cols:
    data[col] = data[col].str.strip()
    data[col] = data[col].astype(float)
    

### Replacing values

In [14]:
def replace_val(init, column):
    
    """
    Replace numeric values
    """
    
    init = str(init).strip().replace(';', '')
    if init in ['0', '1', '2', '3', '4', '5', '98', '99']:
        mapped = mapper[column][init]
        return mapped
    else:
        if init[0] in ['0', '1', '2', '3', '4', '5']:
            use = init[0]
        elif init == '-':
            use_keys = list(mapper[column].keys())
            if '98' in use_keys:
                use = '98'
            elif '99' in use_keys:
                use = '99'
        else:
            use = init
        mapped = mapper[column][use]
        
    return mapped
    

In [15]:
for col in m_cols:
    print(col)
    values = data[col].values
    ret_vals = [replace_val(v, col) for v in values]
    data['{}M'.format(col)] = ret_vals
    

zeroBalanceCode
modificationTypeCode
interestCalculationTypeCode
obligorIncomeVerificationLevelCode
vehicleValueSourceCode
servicingAdvanceMethodCode
obligorEmploymentVerificationCode
originalInterestRateTypeCode
subvented
assetSubjectDemandStatusCode
repurchaseReplacementReasonCode
vehicleTypeCode
vehicleNewUsedCode
paymentTypeCode


In [16]:
# s_col = 'subvented'
# t_col = '{}M'.format(s_col)
# data[[s_col, t_col]]

### Account status

In [17]:
def acct_status(row, b_col, e_col, zero_col, thresh):
    
    """
    Create karus account status
    """
    
    b = float(row[b_col])
    e = float(row[e_col])
    z = str(row[zero_col])
    
    if z in ['Charged-off', 'Repurchased or Replaced']:
        res = z
        return res
    if b < thresh and e < thresh:
        res = 'Prepaid or Matured'
        return res
    if z in ['Unavailable', 'Prepaid or Matured']:
        res = z
        return res
    

In [18]:
b_col = 'reportingPeriodBeginningLoanBalanceAmount'
e_col = 'nextReportingPeriodPaymentAmountDue'
z_col = 'zeroBalanceCodeM'
thresh = 50


In [19]:
data['accountStatus'] = data.parallel_apply(acct_status, args = (b_col, e_col, z_col, thresh, ), axis = 1)


In [20]:
data['accountStatus'].value_counts()


Unavailable                1456237
Charged-off                26733  
Prepaid or Matured         26159  
Repurchased or Replaced    38     
Name: accountStatus, dtype: int64

### Numeric conversion

In [21]:
# force convert cols to numeric
for col in numeric_cols:
    print(col)
    data[col] = pd.to_numeric(data[col], errors='coerce')
    

servicingFlatFeeAmount
nextInterestRatePercentage
obligorCreditScore
scheduledPrincipalAmount
otherAssessedUncollectedServicerFeeAmount
actualOtherCollectedAmount
actualPrincipalCollectedAmount
nextReportingPeriodPaymentAmountDue
currentDelinquencyStatus
originalInterestRatePercentage
reportingPeriodBeginningLoanBalanceAmount
remainingTermToMaturityNumber
originalLoanAmount
repurchaseAmount
paymentExtendedNumber
originalLoanTerm
reportingPeriodScheduledPaymentAmount
otherPrincipalAdjustmentAmount
reportingPeriodActualEndBalanceAmount
gracePeriodNumber
actualInterestCollectedAmount
paymentToIncomePercentage
servicingFeePercentage
chargedoffPrincipalAmount
reportingPeriodInterestRatePercentage
servicerAdvancedAmount
totalActualAmountPaid
repossessedProceedsAmount
scheduledInterestAmount
vehicleValueAmount
recoveredAmount


### Application

In [22]:
all_ids = list(data['ID'].unique())
#all_ids = all_ids[:1000]
print_vals = list(range(0, len(all_ids), 100))
len(all_ids)


50671

In [23]:
#_id = '0001694010 - 000010'
#all_ids = ['0001694010 - 000010']
id_col = 'ID'
status_col = 'accountStatus'
values = ['Charged-off', 'Prepaid or Matured', 'Repurchased or Replaced']


In [24]:
def convert_static(init, _id):
    
    """
    Create static df
    """
    
    df = init[init[id_col] == _id].reset_index(drop = True)
    df = df.sort_values('reportingPeriodBeginningDateR', ascending = False)

    # dict
    account_dict = {}
    account_dict['id'] = _id
    account_dict['records'] = len(df)

    # current status of loan
    for col in loc_cols:
        account_dict['{}LocCurrent'.format(col)] = df[col].iloc[0]
    for col in min_max_cols:
        account_dict['{}MaxCurrent'.format(col)] = df[col].max()
        account_dict['{}MinCurrent'.format(col)] = df[col].min()
    for col in all_vals_cols:
        vals = list(df[col].unique())
        use_vals = ' | '.join(str(val) for val in vals)
        account_dict['{}ValsCurrent'.format(col)] = use_vals
    for col in numeric_cols:
        _sum = df[col].sum()
        account_dict['{}SumCurrent'.format(col)] = _sum
        vec = list(df[col])
        vec = [v for v in vec if str(v) != 'nan']
        if len(vec) > 0:
            _len = len(vec)
            weights = sorted([1 + i for i in list(range(_len))], reverse=True)
            wa = np.average(vec, weights=weights)
            account_dict['{}WeightedCurrent'.format(col)] = wa
        else:
            account_dict['{}WeightedCurrent'.format(col)] = 0

    # event information
    init_vals = list(df[status_col].unique())
    inter = list(set(values).intersection(init_vals))
    if len(inter) > 0:
        account_dict['eventOccurred'] = 1
        n = df[status_col].where(df[status_col].isin(values)).last_valid_index()
        n_bool = True
        single = df.loc[[n]]
        for col in event_cols:
            account_dict['{}Event'.format(col)] = single[col].iloc[0]

        # prior to event
        init = n+1
        sub = df[init:len(df)]
        sub.reset_index(drop = True, inplace = True)
        account_dict['priorHistory'] = len(sub)
        sub_bool = True
        if len(sub) > 0:
            for col in loc_cols:
                account_dict['{}LocPrior'.format(col)] = sub[col].iloc[0]
            for col in min_max_cols:
                account_dict['{}MinPrior'.format(col)] = sub[col].min()
                account_dict['{}MaxPrior'.format(col)] = sub[col].max()
            for col in all_vals_cols:
                vals = list(sub[col].unique())
                use_vals = ' | '.join(str(val) for val in vals)
                account_dict['{}ValsPrior'.format(col)] = use_vals
            for col in numeric_cols:
                account_dict['{}SumPrior'.format(col)] = sub[col].sum()
                vec = list(sub[col])
                vec = [v for v in vec if str(v) != 'nan']
                if len(vec) > 0:
                    _len = len(vec)
                    weights = sorted([1 + i for i in list(range(_len))], reverse=True)
                    wa = np.average(vec, weights=weights)
                    account_dict['{}WeightedPrior'.format(col)] = wa
                else:
                    account_dict['{}WeightedPrior'.format(col)] = 0

            # random
            len_sub = len(sub)
            s = random.randint(0, len_sub)
            if s == len_sub:
                s = s -1
            r_sub = sub[s:len_sub].reset_index(drop = True)
            account_dict['randomIndex'] = s
            for col in loc_cols:
                account_dict['{}LocRandom'.format(col)] = r_sub[col].iloc[0]
            for col in min_max_cols:
                account_dict['{}MinRandom'.format(col)] = r_sub[col].min()
                account_dict['{}MaxRandom'.format(col)] = r_sub[col].max()
            for col in all_vals_cols:
                vals = list(r_sub[col].unique())
                use_vals = ' | '.join(str(val) for val in vals)
                account_dict['{}ValsRandom'.format(col)] = use_vals
            for col in numeric_cols:
                account_dict['{}SumRandom'.format(col)] = r_sub[col].sum()
                vec = list(r_sub[col])
                vec = [v for v in vec if str(v) != 'nan']
                if len(vec) > 0:
                    _len = len(vec)
                    weights = sorted([1 + i for i in list(range(_len))], reverse=True)
                    wa = np.average(vec, weights=weights)
                    account_dict['{}WeightedRandom'.format(col)] = wa
                else:
                    account_dict['{}WeightedRandom'.format(col)] = 0

        # if event is first row of sub       
        else:
            for col in loc_cols:
                account_dict['{}LocPrior'.format(col)] = df[col].iloc[0]
            for col in min_max_cols:
                account_dict['{}MinPrior'.format(col)] = df[col].min()
                account_dict['{}MaxPrior'.format(col)] = df[col].max()
            for col in all_vals_cols:
                vals = list(df[col].unique())
                use_vals = ' | '.join(str(val) for val in vals)
                account_dict['{}ValsPrior'.format(col)] = use_vals
            for col in numeric_cols:
                account_dict['{}SumPrior'.format(col)] = df[col].sum()
                account_dict['{}WeightedPrior'.format(col)] = df[col].iloc[0]

    # if no event        
    else:
        account_dict['eventOccurred'] = 0
        account_dict['priorHistory'] = len(df)
        sub_bool = False
        n_bool = False
        for col in event_cols:
            account_dict['{}Event'.format(col)] = np.nan
        for col in loc_cols:
            account_dict['{}LocPrior'.format(col)] = np.nan
        for col in min_max_cols:
            account_dict['{}MinPrior'.format(col)] = np.nan
            account_dict['{}MaxPrior'.format(col)] = np.nan
        for col in all_vals_cols:
            account_dict['{}ValsPrior'.format(col)] = np.nan
        for col in numeric_cols:
            account_dict['{}SumPrior'.format(col)] = np.nan
            account_dict['{}WeightedPrior'.format(col)] = np.nan

        # random set to nan
        account_dict['randomIndex'] = np.nan
        for col in loc_cols:
            account_dict['{}LocRandom'.format(col)] = np.nan
        for col in min_max_cols:
            account_dict['{}MinRandom'.format(col)] = np.nan
            account_dict['{}MaxRandom'.format(col)] = np.nan
        for col in all_vals_cols:
            account_dict['{}ValsRandom'.format(col)] = np.nan
        for col in numeric_cols:
            account_dict['{}SumRandom'.format(col)] = np.nan
            account_dict['{}WeightedRandom'.format(col)] = np.nan

    return account_dict


In [25]:
def convert_static_2(df):
    
    """
    Create static df
    """
    
    #df = init[init[id_col] == _id].reset_index(drop = True)
    df = df.sort_values('reportingPeriodBeginningDateR', ascending = False)
    _id = df['ID'].iloc[0]

    # dict
    account_dict = {}
    account_dict['id'] = _id
    account_dict['records'] = len(df)

    # current status of loan
    for col in loc_cols:
        account_dict['{}LocCurrent'.format(col)] = df[col].iloc[0]
    for col in min_max_cols:
        account_dict['{}MaxCurrent'.format(col)] = df[col].max()
        account_dict['{}MinCurrent'.format(col)] = df[col].min()
    for col in all_vals_cols:
        vals = list(df[col].unique())
        use_vals = ' | '.join(str(val) for val in vals)
        account_dict['{}ValsCurrent'.format(col)] = use_vals
    for col in numeric_cols:
        _sum = df[col].sum()
        account_dict['{}SumCurrent'.format(col)] = _sum
        vec = list(df[col])
        vec = [v for v in vec if str(v) != 'nan']
        if len(vec) > 0:
            _len = len(vec)
            weights = sorted([1 + i for i in list(range(_len))], reverse=True)
            wa = np.average(vec, weights=weights)
            account_dict['{}WeightedCurrent'.format(col)] = wa
        else:
            account_dict['{}WeightedCurrent'.format(col)] = 0

    # event information
    init_vals = list(df[status_col].unique())
    inter = list(set(values).intersection(init_vals))
    if len(inter) > 0:
        account_dict['eventOccurred'] = 1
        n = df[status_col].where(df[status_col].isin(values)).last_valid_index()
        n_bool = True
        single = df.loc[[n]]
        for col in event_cols:
            account_dict['{}Event'.format(col)] = single[col].iloc[0]

        # prior to event
        init = n+1
        sub = df[init:len(df)]
        sub.reset_index(drop = True, inplace = True)
        account_dict['priorHistory'] = len(sub)
        sub_bool = True
        if len(sub) > 0:
            for col in loc_cols:
                account_dict['{}LocPrior'.format(col)] = sub[col].iloc[0]
            for col in min_max_cols:
                account_dict['{}MinPrior'.format(col)] = sub[col].min()
                account_dict['{}MaxPrior'.format(col)] = sub[col].max()
            for col in all_vals_cols:
                vals = list(sub[col].unique())
                use_vals = ' | '.join(str(val) for val in vals)
                account_dict['{}ValsPrior'.format(col)] = use_vals
            for col in numeric_cols:
                account_dict['{}SumPrior'.format(col)] = sub[col].sum()
                vec = list(sub[col])
                vec = [v for v in vec if str(v) != 'nan']
                if len(vec) > 0:
                    _len = len(vec)
                    weights = sorted([1 + i for i in list(range(_len))], reverse=True)
                    wa = np.average(vec, weights=weights)
                    account_dict['{}WeightedPrior'.format(col)] = wa
                else:
                    account_dict['{}WeightedPrior'.format(col)] = 0

            # random
            len_sub = len(sub)
            s = random.randint(0, len_sub)
            if s == len_sub:
                s = s -1
            r_sub = sub[s:len_sub].reset_index(drop = True)
            account_dict['randomIndex'] = s
            for col in loc_cols:
                account_dict['{}LocRandom'.format(col)] = r_sub[col].iloc[0]
            for col in min_max_cols:
                account_dict['{}MinRandom'.format(col)] = r_sub[col].min()
                account_dict['{}MaxRandom'.format(col)] = r_sub[col].max()
            for col in all_vals_cols:
                vals = list(r_sub[col].unique())
                use_vals = ' | '.join(str(val) for val in vals)
                account_dict['{}ValsRandom'.format(col)] = use_vals
            for col in numeric_cols:
                account_dict['{}SumRandom'.format(col)] = r_sub[col].sum()
                vec = list(r_sub[col])
                vec = [v for v in vec if str(v) != 'nan']
                if len(vec) > 0:
                    _len = len(vec)
                    weights = sorted([1 + i for i in list(range(_len))], reverse=True)
                    wa = np.average(vec, weights=weights)
                    account_dict['{}WeightedRandom'.format(col)] = wa
                else:
                    account_dict['{}WeightedRandom'.format(col)] = 0

        # if event is first row of sub       
        else:
            for col in loc_cols:
                account_dict['{}LocPrior'.format(col)] = df[col].iloc[0]
            for col in min_max_cols:
                account_dict['{}MinPrior'.format(col)] = df[col].min()
                account_dict['{}MaxPrior'.format(col)] = df[col].max()
            for col in all_vals_cols:
                vals = list(df[col].unique())
                use_vals = ' | '.join(str(val) for val in vals)
                account_dict['{}ValsPrior'.format(col)] = use_vals
            for col in numeric_cols:
                account_dict['{}SumPrior'.format(col)] = df[col].sum()
                account_dict['{}WeightedPrior'.format(col)] = df[col].iloc[0]

    # if no event        
    else:
        account_dict['eventOccurred'] = 0
        account_dict['priorHistory'] = len(df)
        sub_bool = False
        n_bool = False
        for col in event_cols:
            account_dict['{}Event'.format(col)] = np.nan
        for col in loc_cols:
            account_dict['{}LocPrior'.format(col)] = np.nan
        for col in min_max_cols:
            account_dict['{}MinPrior'.format(col)] = np.nan
            account_dict['{}MaxPrior'.format(col)] = np.nan
        for col in all_vals_cols:
            account_dict['{}ValsPrior'.format(col)] = np.nan
        for col in numeric_cols:
            account_dict['{}SumPrior'.format(col)] = np.nan
            account_dict['{}WeightedPrior'.format(col)] = np.nan

        # random set to nan
        account_dict['randomIndex'] = np.nan
        for col in loc_cols:
            account_dict['{}LocRandom'.format(col)] = np.nan
        for col in min_max_cols:
            account_dict['{}MinRandom'.format(col)] = np.nan
            account_dict['{}MaxRandom'.format(col)] = np.nan
        for col in all_vals_cols:
            account_dict['{}ValsRandom'.format(col)] = np.nan
        for col in numeric_cols:
            account_dict['{}SumRandom'.format(col)] = np.nan
            account_dict['{}WeightedRandom'.format(col)] = np.nan

    return account_dict


In [26]:
all_ids = all_ids[0:1000]
sub_test = data[data['ID'].isin(all_ids)].reset_index(drop = True)
len(sub_test)


46564

In [27]:
splits = list(sub_test.groupby('ID')) 
nl = [splits[n][1] for n in list(range(len(splits)))]
nl_2 = np.array(nl)


  This is separate from the ipykernel package so we can avoid doing imports until


In [53]:
s = time.time()
test = [convert_static(data, _id) for _id in all_ids]
e = time.time()
e - s

9.735635995864868

In [28]:
s = time.time()
test = [convert_static_2(d) for d in nl_2]
e = time.time()
e - s

12.657718181610107

In [29]:
t = pd.DataFrame(test)

In [34]:
s = time.time()
for _id in all_ids:
    df = data[data[id_col] == _id].reset_index(drop = True)
    #df = df.sort_values('reportingPeriodBeginningDateR', ascending = False)
e = time.time()
e - s


8.976347208023071

In [54]:
splits = list(sub_test.groupby("ID")) 
nl = [splits[n][1] for n in list(range(len(splits)))]

In [25]:
# all_ids = all_ids[0:300]
# num_cores = multiprocessing.cpu_count()
# s = time.time()
# holder = Parallel(n_jobs=6, backend='threading')(delayed(convert_static)(i) for i in all_ids)
# e = time.time()
# e-s

In [25]:
holder = []
broke = []
counter = 0
status = 'good'
start = time.time()
for _id in all_ids:
    counter = counter + 1
    if counter in print_vals:
        print(counter, counter/len(all_ids))
        end = time.time()
        print (end - start)
        print('------------------')
    try:
        account_dict = convert_static(_id)
        holder.append(account_dict)
    except:
        print('error {}'.format(_id))
        broke.append(_id)
        if len(broke) > 10:
            status = 'bad'
            sys.exit('too many errors')
final = time.time()
time_elapsed = final - start


100 0.001973515423023031
12.863536834716797
------------------
200 0.003947030846046062
23.931628942489624
------------------
300 0.005920546269069093
35.49951982498169
------------------
400 0.007894061692092124
46.13793683052063
------------------
500 0.009867577115115154
56.55598998069763
------------------
600 0.011841092538138186
67.82160997390747
------------------
700 0.013814607961161216
78.3015649318695
------------------
800 0.015788123384184248
89.15629172325134
------------------
900 0.01776163880720728
99.18488073348999
------------------
1000 0.01973515423023031
109.53192281723022
------------------
1100 0.02170866965325334
119.79348182678223
------------------
1200 0.023682185076276372
129.41310381889343
------------------
1300 0.0256557004992994
139.6499650478363
------------------
1400 0.027629215922322432
150.085697889328
------------------
1500 0.029602731345345464
159.94154906272888
------------------
1600 0.031576246768368496
171.4302897453308
------------------
17

13300 0.2624775512620631
1550.8879129886627
------------------
13400 0.26445106668508617
1560.891931772232
------------------
13500 0.26642458210810915
1570.814357995987
------------------
13600 0.2683980975311322
1581.2113647460938
------------------
13700 0.27037161295415524
1591.6102268695831
------------------
13800 0.2723451283771783
1601.949858903885
------------------
13900 0.2743186438002013
1612.2668197154999
------------------
14000 0.2762921592232243
1622.948677778244
------------------
14100 0.27826567464624735
1633.2685370445251
------------------
14200 0.2802391900692704
1643.6888048648834
------------------
14300 0.28221270549229344
1654.9068748950958
------------------
14400 0.2841862209153165
1665.102430820465
------------------
14500 0.28615973633833947
1675.6953699588776
------------------
14600 0.2881332517613625
1685.8621609210968
------------------
14700 0.29010676718438555
1696.1426858901978
------------------
14800 0.2920802826074086
1706.5885980129242
---------

26400 0.5210080716780802
3215.481160879135
------------------
26500 0.5229815871011032
3227.9630048274994
------------------
26600 0.5249551025241262
3240.3380677700043
------------------
26700 0.5269286179471493
3252.5736377239227
------------------
26800 0.5289021333701723
3264.8296880722046
------------------
26900 0.5308756487931953
3277.0259788036346
------------------
27000 0.5328491642162183
3289.1155037879944
------------------
27100 0.5348226796392413
3301.3805768489838
------------------
27200 0.5367961950622644
3313.620803833008
------------------
27300 0.5387697104852874
3325.9206438064575
------------------
27400 0.5407432259083105
3338.925208091736
------------------
27500 0.5427167413313335
3351.867901802063
------------------
error 0001694010 - 011453-2017-1
error 0001694010 - 012247-2017-1
error 0001694010 - 012461-2017-1
error 0001694010 - 012486-2017-1
error 0001694010 - 012606-2017-1
error 0001694010 - 012617-2017-1
error 0001694010 - 012951-2017-1
error 0001694010 

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



error 0001694010 - 013062-2017-1
error 0001694010 - 013216-2017-1
Traceback (most recent call last):
  File "<ipython-input-26-64e76e8a438f>", line 14, in <module>
    account_dict = convert_static(_id)
  File "<ipython-input-24-600a393d12d7>", line 7, in convert_static
    df = data[data[id_col] == _id].reset_index(drop = True)
  File "/anaconda3/envs/kn/lib/python3.7/site-packages/pandas/core/ops/common.py", line 64, in new_method
    return method(self, other)
  File "/anaconda3/envs/kn/lib/python3.7/site-packages/pandas/core/ops/__init__.py", line 529, in wrapper
    res_values = comparison_op(lvalues, rvalues, op)
  File "/anaconda3/envs/kn/lib/python3.7/site-packages/pandas/core/ops/array_ops.py", line 247, in comparison_op
    res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
  File "/anaconda3/envs/kn/lib/python3.7/site-packages/pandas/core/ops/array_ops.py", line 57, in comp_method_OBJECT_ARRAY
    result = libops.scalar_compare(x.ravel(), y, op)
KeyboardInterrupt

D

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "<ipython-input-26-64e76e8a438f>", line 14, in <module>
    account_dict = convert_static(_id)
  File "<ipython-input-24-600a393d12d7>", line 7, in convert_static
    df = data[data[id_col] == _id].reset_index(drop = True)
  File "/anaconda3/envs/kn/lib/python3.7/site-packages/pandas/core/ops/common.py", line 64, in new_method
    return method(self, other)
  File "/anaconda3/envs/kn/lib/python3.7/site-packages/pandas/core/ops/__init__.py", line 529, in wrapper
    res_values = comparison_op(lvalues, rvalues, op)
  File "/anaconda3/envs/kn/lib/python3.7/site-packages/pandas/core/ops/array_ops.py", line 247, in comparison_op
    res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
  File "/anaconda3/envs/kn/lib/python3.7/site-packages/pandas/core/ops/array_ops.py", line 57, in comp_method_OBJECT_ARRAY
    result = libops.scalar_compare(x.ravel(), y, op)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

TypeError: object of type 'NoneType' has no len()

In [29]:
log = {}
log['errors'] = len(broke)
log['status'] = status
log['term'] = time_elapsed


NameError: name 'time_elapsed' is not defined

In [30]:
log

{'errors': 11, 'status': 'good'}

In [None]:
master = pd.DataFrame(holder)


In [None]:
master['accountStatusEvent'].value_counts(dropna = False)


In [None]:
master['securitization'] = term


In [None]:
master.shape


### Export

In [None]:
e_folder = 'data/static/'
e_file = '{} static.csv'.format(term)
e_path = e_folder + e_file
master.to_csv(e_path, index = False)


In [None]:
# export log
j_folder = 'data/static/prepared/log/'
j_file = '{} log.json'.format(term)
j_path = j_folder + j_file
with open(j_path, 'w') as outfile:  
    json.dump(log, outfile, indent = 4, separators = (',', ': '), sort_keys = False)


In [None]:
print('continue...')


### End