### Load libs

In [17]:
import pandas as pd
import json
import numpy as np
import random
from pandarallel import pandarallel
import re

pandarallel.initialize()


INFO: Pandarallel will run on 12 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


In [18]:
n = 500
pd.set_option('display.max_columns', n)
pd.set_option('display.max_rows', n)
pd.set_option('display.max_colwidth', -1)

  after removing the cwd from sys.path.


### Load data

In [19]:
term = 'AmeriCredit Automobile Receivables Trust 2017-1 Data Tape'


In [20]:
# load abs
folder = 'data/combined/'
file = '{}.csv'.format(term)
path = folder + file
data = pd.read_csv(path)
data.shape


(1509167, 73)

In [21]:
# load fields
f_folder = 'data/json/fields/'
f_file = 'fields.json'
f_path = f_folder + f_file
with open(f_path) as f:
    fields = json.load(f)
    

In [22]:
# load mapper
m_folder = 'data/dictionary/mapper/'
m_file = 'mapper.json'
m_path = m_folder + m_file
with open(m_path) as f:
    mapper = json.load(f)
    

In [24]:
# find securitization
finder = re.compile('\d{4,}\W\d{1,}')
add_id = re.findall(finder, term)[0]
add_id

'2017-1'

### Formatting

In [25]:
init_id = fields['init_id'][0]
date_cols = fields['dates']
replacer_cols = fields['replace_dash']
clean_cols = fields['clean']
m_cols = fields['map']
event_cols = fields['event']
loc_cols = fields['all_loc']
numeric_cols = fields['numeric']
all_vals_cols = fields['all_vals']
min_max_cols = fields['min_max']

In [26]:
def convert_id(row, column):
    
    """
    Convert ids
    """
    
    init = str(row[column])
    
    cleaned = init.replace('=', '').replace('"', '').strip()
    
    return cleaned
    

In [27]:
def reorder_date(row, column):
    
    """
    Reorder date
    """
    
    init = str(row[column])
    if init != '-':
        if '/' not in init:
            y = init[6:10]
            m = init[0:2]
            d = init[3:5]
            date = y + '-' + m + '-' + d
        elif '/' in init:
            y = init[3:7]
            m = init[0:2]
            date = y + '-' + m
    else:
        date = ''
    
    return date
        

In [28]:
data['ID'] = data.parallel_apply(convert_id, args = ('assetNumber', ), axis = 1)
data['id'] = data['ID'] + ' {}'.format(add_id)


In [30]:
for col in date_cols:
    print(col)
    data['{}R'.format(col)] = data.parallel_apply(reorder_date, args = (col, ), axis = 1)
    

reportingPeriodEndingDate
originalFirstPaymentDate
loanMaturityDate
interestPaidThroughDate
reportingPeriodBeginningDate
DemandResolutionDate
originationDate
mostRecentServicingTransferReceivedDate
zeroBalanceEffectiveDate


In [31]:
data[replacer_cols] = data[replacer_cols].replace('-', np.nan)

In [32]:
# clean cols
for col in clean_cols:
    data[col] = data[col].str.strip()
    data[col] = data[col].astype(float)
    

In [33]:
def replace_val(row, column):
    
    """
    Replace numeric values
    """
    
    init = str(row[column]).strip().replace(';', '')
    if init in ['0', '1', '2', '3', '4', '5', '98', '99']:
        mapped = mapper[column][init]
        return mapped
    else:
        if init[0] in ['0', '1', '2', '3', '4', '5']:
            use = init[0]
        elif init == '-':
            use_keys = list(mapper[column].keys())
            if '98' in use_keys:
                use = '98'
            elif '99' in use_keys:
                use = '99'
        else:
            use = init
        mapped = mapper[column][use]
    return mapped
    

In [34]:
for col in m_cols:
    print(col)
    new_col = col + 'M'
    data[new_col] = data.parallel_apply(replace_val, args = (col, ), axis = 1)
    

zeroBalanceCode
modificationTypeCode
interestCalculationTypeCode
obligorIncomeVerificationLevelCode
vehicleValueSourceCode
servicingAdvanceMethodCode
obligorEmploymentVerificationCode
originalInterestRateTypeCode
subvented
assetSubjectDemandStatusCode
repurchaseReplacementReasonCode
vehicleTypeCode
vehicleNewUsedCode
paymentTypeCode


In [35]:
def acct_status(row, b_col, e_col, zero_col, thresh):
    
    """
    Create karus account status
    """
    
    b = float(row[b_col])
    e = float(row[e_col])
    z = str(row[zero_col])
    
    if z in ['Charged-off', 'Repurchased or Replaced']:
        res = z
        return res
    if b < thresh and e < thresh:
        res = 'Prepaid or Matured'
        return res
    if z in ['Unavailable', 'Prepaid or Matured']:
        res = z
        return res
    

In [36]:
b_col = 'reportingPeriodBeginningLoanBalanceAmount'
e_col = 'nextReportingPeriodPaymentAmountDue'
z_col = 'zeroBalanceCodeM'
thresh = 50


In [None]:
data['accountStatus'] = data.parallel_apply(acct_status, args = (b_col, e_col, z_col, thresh, ), axis = 1)

In [None]:
data['accountStatus'].value_counts()

### Application

In [20]:
all_ids = list(data['ID'].unique())
print_vals = list(range(0, len(all_ids), 100))


In [21]:
#_id = '0001694010 - 000010'
#all_ids = ['0001694010 - 000010', '0001694010 - 000088', '0001694010 - 009321']
sum_cols = ['chargedoffPrincipalAmount', 'recoveredAmount', 'repossessedProceedsAmount']
id_col = 'ID'
status_col = 'accountStatus'
values = ['Charged-off', 'Prepaid or Matured', 'Repurchased or Replaced']


In [22]:
holder = []
counter = 0

for _id in all_ids:
    #print(_id)
    counter = counter + 1
    if counter in print_vals:
        print(counter, counter/len(all_ids))
        print('------------------------------')
    
    df = data[data[id_col] == _id].reset_index(drop = True)
    df = df.sort_values('reportingPeriodBeginningDateR', ascending = False)
    for col in sum_cols:
        df['{}Sum'.format(col)] = df[col].sum()
    
    init_vals = list(df[status_col].unique())
    inter = list(set(values).intersection(init_vals))
    
    if len(inter) > 0:
        n = df[status_col].where(df[status_col].isin(values)).last_valid_index()
        sub = df[n:len(df)]
        sub.reset_index(drop = True, inplace = True)
        holder.append(sub)
    else:
        df.reset_index(drop = True, inplace = True)
        holder.append(df)


100 6.626171921331436e-05
------------------------------
200 0.00013252343842662872
------------------------------
300 0.0001987851576399431
------------------------------
400 0.00026504687685325744
------------------------------
500 0.0003313085960665718
------------------------------
600 0.0003975703152798862
------------------------------
700 0.0004638320344932006
------------------------------
800 0.0005300937537065149
------------------------------
900 0.0005963554729198293
------------------------------
1000 0.0006626171921331436
------------------------------
1100 0.000728878911346458
------------------------------
1200 0.0007951406305597724
------------------------------
1300 0.0008614023497730867
------------------------------
1400 0.0009276640689864011
------------------------------
1500 0.0009939257881997155
------------------------------
1600 0.0010601875074130298
------------------------------
1700 0.0011264492266263443
------------------------------
1800 0.001192710945839

KeyboardInterrupt: 

In [None]:
master = pd.concat(holder)


In [None]:
master = master.reset_index(drop = True)

### Add target

In [None]:
master['previousRemainingTerm'] = master['remainingTermToMaturityNumber'].shift(-1)

In [None]:
def get_target(row):
    
    """
    Set target var
    """
    
    init = str(row['accountStatus'])
    remaining = row['previousRemainingTerm']
    
    if init == 'Charged-off':
        res = 'Charged-off'
        return res

    if init == 'Prepaid or Matured' and remaining > 0:
        res = 'Prepaid'
        return res
    
    if init == 'Prepaid or Matured' and remaining < 1:
        res = 'Closed'
        return res
    
    if init == 'Unavailable':
        res = 'Active or other'
        return res
    

In [None]:
master['target'] = master.apply(get_target, axis = 1)

In [None]:
master['target'].value_counts()

### Export

In [None]:
e_folder = 'data/transaction/'
e_file = '{} transaction prepared.csv'.format(term)
e_path = e_folder + e_file
e_path

In [None]:
master.to_csv(e_path, index = False)

In [None]:
print('continue...')

### End