In [1]:
import pandas as pd
from datetime import timedelta

In [2]:
# Read in data
accounts = pd.read_csv("../Resources/accounts.csv")
ast = pd.read_csv("../Resources/account_state_transitions.csv")
groups = pd.read_csv("../Resources/groups.csv")
payments = pd.read_csv("../Resources/payments.csv")

## Merge Dataframes

In [3]:
# Merge 'ast' and 'accounts' dataframes
ast_accounts_merge = pd.merge(left=ast, right=accounts, how='outer', left_on='account_id', right_on='id')

# Limit columns
ast_accounts_merge = ast_accounts_merge[['started_when','account_id','from_state','to_state','group_id','registration_date']]

In [4]:
# Merge with 'groups' dataframe
ast_accounts_groups_merge = pd.merge(left=ast_accounts_merge, right=groups, how='outer', left_on='group_id', right_on='id')

# Limit columns
aag_merge = ast_accounts_groups_merge.drop(columns=['id','name','price_clock_hour'])

In [5]:
# Merge with 'payments' dataframe
merged_df = pd.merge(left=aag_merge,right=payments,how="outer",left_on='account_id',right_on='account_id')

# Drop and rename columns
merged_df = merged_df.drop(columns=['id','currency'])
merged_df = merged_df.rename(columns={'amount':'payment_amount',
                                     'started_when':'state_transition_date',
                                     'effective_when':'payment_date'})
# merged_df.head()

In [6]:
# Convert to datetime
merged_df['state_transition_date'] = pd.to_datetime(merged_df['state_transition_date'])
merged_df['registration_date'] = pd.to_datetime(merged_df['registration_date'])
merged_df['payment_date'] = pd.to_datetime(merged_df['payment_date'])

In [7]:
# Add calculated columns
merged_df['max_num_payments'] = (merged_df['price_unlock'] - merged_df['price_upfront']) / merged_df['minimum_payment']
# merged_df.head()

### How long does it typically take Account holders to repay the unlock price as a % of the nominal loan term?

In [8]:
# Filter dataframes
unlocked_df = ast[ast['to_state']=='UNLOCKED']
created_df = ast[ast['from_state']=='CREATED']

# Select ids of unlocked accounts
unlocked_accounts = [account for account in unlocked_df['account_id']]

In [9]:
# Merge and limit columns
created_unlocked = pd.merge(left=unlocked_df,right=created_df,how="inner",left_on='account_id',right_on='account_id')
created_unlocked = created_unlocked[['account_id','started_when_y','started_when_x']]

In [10]:
# Rename columns
created_unlocked = created_unlocked.rename(columns={
    'started_when_y':'date_created',
    'started_when_x':'date_unlock'
})

# created_unlocked.head()

In [11]:
# Convert to datetime
created_unlocked['date_unlock'] = pd.to_datetime(created_unlocked['date_unlock'])
created_unlocked['date_created'] = pd.to_datetime(created_unlocked['date_created'])

In [12]:
# Add calculated column
created_unlocked['time_difference'] = created_unlocked['date_unlock'] - created_unlocked['date_created']
# created_unlocked.head()

### "as a percentage of nominal loan term" --> number of actual payments / max_num_payments

In [13]:
# Count of payments
payments_ct = payments.groupby('account_id', as_index=False)['id'].count()

# Rename column
payments_ct = payments_ct.rename(columns={'id':'num_payments'})
# payments_ct.head()

In [14]:
# Filter dataframe for 'unlocked' accounts
payments_ct_unlocked = payments_ct[payments_ct['account_id'].isin(unlocked_accounts)]
# payments_ct_unlocked.head()

In [15]:
created_unlocked = pd.merge(left=created_unlocked,right=payments_ct_unlocked,how='outer',left_on='account_id',right_on='account_id')
# created_unlocked.head()

#### Get max_num_payments

In [16]:
# Grab maximum number of payments for each account and filter by unlocked accounts
num_payments_grouped = merged_df.groupby('account_id',as_index=False)['max_num_payments'].min()
num_payments_grouped = num_payments_grouped[num_payments_grouped['account_id'].isin(unlocked_accounts)]
# created_unlocked.head()

In [17]:
created_unlocked = pd.merge(left=created_unlocked,right=num_payments_grouped,how='outer',left_on='account_id',right_on='account_id')
created_unlocked.head()

Unnamed: 0,account_id,date_created,date_unlock,time_difference,num_payments,max_num_payments
0,2,2019-11-06 05:49:39.571392,2020-02-24 02:00:21.571392,109 days 20:10:42,18,100.0
1,4,2019-12-11 03:18:53.571392,2020-02-25 04:28:59.571392,76 days 01:10:06,13,100.0
2,6,2019-09-29 13:04:45.571392,2020-02-20 18:54:47.571392,144 days 05:50:02,23,100.0
3,9,2019-10-10 12:49:10.571392,2020-03-04 07:45:50.571392,145 days 18:56:40,23,100.0
4,10,2019-12-15 03:38:31.571392,2020-02-21 15:48:49.571392,68 days 12:10:18,12,100.0


In [21]:
# Add calculated column
created_unlocked['pct_loan_term'] = created_unlocked['num_payments']/created_unlocked['max_num_payments']

# Format 'pct_loan_term' as a percentage
created_unlocked['pct_loan_term'] = created_unlocked['pct_loan_term'].map("{:.0%}".format)
created_unlocked.head(10)

Unnamed: 0,account_id,date_created,date_unlock,time_difference,num_payments,max_num_payments,pct_loan_term
0,2,2019-11-06 05:49:39.571392,2020-02-24 02:00:21.571392,109 days 20:10:42,18,100.0,18%
1,4,2019-12-11 03:18:53.571392,2020-02-25 04:28:59.571392,76 days 01:10:06,13,100.0,13%
2,6,2019-09-29 13:04:45.571392,2020-02-20 18:54:47.571392,144 days 05:50:02,23,100.0,23%
3,9,2019-10-10 12:49:10.571392,2020-03-04 07:45:50.571392,145 days 18:56:40,23,100.0,23%
4,10,2019-12-15 03:38:31.571392,2020-02-21 15:48:49.571392,68 days 12:10:18,12,100.0,12%
5,11,2019-09-22 00:50:52.571392,2020-02-22 10:21:48.571392,153 days 09:30:56,24,100.0,24%
6,12,2019-10-03 09:22:40.571392,2020-03-03 13:35:30.571392,152 days 04:12:50,24,100.0,24%
7,14,2019-07-14 10:38:51.571392,2020-02-29 01:21:59.571392,229 days 14:43:08,35,100.0,35%
8,16,2019-06-27 01:35:05.571392,2020-03-02 21:13:29.571392,249 days 19:38:24,38,100.0,38%
9,18,2019-10-03 09:21:49.571392,2020-03-04 14:41:14.571392,153 days 05:19:25,24,100.0,24%
