In [1]:
import pandas as pd
from datetime import timedelta

In [2]:
# Read in data
accounts = pd.read_csv("../Resources/accounts.csv")
ast = pd.read_csv("../Resources/account_state_transitions.csv")
groups = pd.read_csv("../Resources/groups.csv")

## 4) Devise a method of generating a table of all Accounts with their “nominal” expected payments as of any arbitrary timestamp in the past. In other words, we’re looking for the amount the distributor would have collected as of the test date assuming the client had paid the minimum payment instantaneously upon going to a “disabled” state until reaching the unlock price.

## Merge Dataframes

In [3]:
# Merge 'ast' and 'accounts' dataframes
ast_accounts_merge = pd.merge(left=ast, right=accounts, how='outer', left_on='account_id', right_on='id')

# Limit columns
ast_accounts_merge = ast_accounts_merge[['started_when','account_id','to_state','group_id']]
ast_accounts_merge.head()

Unnamed: 0,started_when,account_id,to_state,group_id
0,2019-11-06 05:49:39.571392,2,ENABLED,1
1,2019-11-25 22:20:59.150339,2,DISABLED,1
2,2019-11-26 10:22:36.571392,2,ENABLED,1
3,2019-11-29 11:03:32.150339,2,DISABLED,1
4,2019-12-03 10:08:03.571392,2,ENABLED,1


In [4]:
# Merge with 'groups' dataframe
ast_accounts_groups_merge = pd.merge(left=ast_accounts_merge, right=groups, how='outer', left_on='group_id', right_on='id')

# Limit columns
aag_merge = ast_accounts_groups_merge.drop(columns=['id','name','price_clock_hour'])
aag_merge.head()

Unnamed: 0,started_when,account_id,to_state,group_id,price_upfront,price_unlock,minimum_payment
0,2019-11-06 05:49:39.571392,2,ENABLED,1,950,8100,71.5
1,2019-11-25 22:20:59.150339,2,DISABLED,1,950,8100,71.5
2,2019-11-26 10:22:36.571392,2,ENABLED,1,950,8100,71.5
3,2019-11-29 11:03:32.150339,2,DISABLED,1,950,8100,71.5
4,2019-12-03 10:08:03.571392,2,ENABLED,1,950,8100,71.5


## Limit by Indicated Timestamp

In [5]:
# Variable to take timestamp input.
timestamp_str = input("Enter date with YYYY-MM-DD format.")

Enter date with YYYY-MM-DD format.2020-03-01


In [6]:
# Convert variable from string to datetime object
timestamp_dt = pd.to_datetime(timestamp_str)

# Adjust timestamp to end of day
timestamp_dt = timestamp_dt + timedelta(hours=23,minutes=59,seconds=59)

In [7]:
# Change data type from string to datetime object
aag_merge['started_when'] = pd.to_datetime(aag_merge['started_when'])

In [8]:
# Create limited dataframe based on indicated timestamp and to_state
aag_merge_date_limited = aag_merge.loc[(aag_merge['started_when'] <= timestamp_dt) & (aag_merge['to_state']=='DISABLED')]

In [9]:
aag_merge_date_limited.head()

Unnamed: 0,started_when,account_id,to_state,group_id,price_upfront,price_unlock,minimum_payment
1,2019-11-25 22:20:59.150339,2,DISABLED,1,950,8100,71.5
3,2019-11-29 11:03:32.150339,2,DISABLED,1,950,8100,71.5
5,2019-12-06 06:44:56.624024,2,DISABLED,1,950,8100,71.5
7,2019-12-12 11:56:56.518760,2,DISABLED,1,950,8100,71.5
9,2019-12-18 11:33:11.202971,2,DISABLED,1,950,8100,71.5


In [10]:
# Group by account and count the occurrences of "DISABLED" state 
aag_grouped_df = aag_merge_date_limited.groupby('account_id',as_index=False).agg({
    'to_state':'count',
    'price_upfront':'max',
    'price_unlock':'max',
    'minimum_payment':'max',})

# Rename column
aag_grouped_df = aag_grouped_df.rename(columns={'to_state':'count_disabled'})
aag_grouped_df.head()

Unnamed: 0,account_id,count_disabled,price_upfront,price_unlock,minimum_payment
0,1,2,950,8100,71.5
1,2,14,950,8100,71.5
2,3,10,950,8100,71.5
3,4,8,950,8100,71.5
4,5,9,950,8100,71.5


In [11]:
# Calculate nominal expected payment
expected_payments = (aag_grouped_df['count_disabled']*aag_grouped_df['minimum_payment']) + aag_grouped_df['price_upfront']

In [12]:
# Make new dataframe with account_id and expected_payment
expected_payments_df = pd.DataFrame({'account_id':limited_grouped_df['account_id'],
                  f'expected_payment_as_of_{timestamp_str}':expected_payments})

# Preview dataframe
print(f"NOMINAL EXPECTED PAYMENTS BY ACCOUNT AS OF: {timestamp_str[:10]}")
expected_payments_df.head()

NOMINAL EXPECTED PAYMENTS BY ACCOUNT AS OF: 2020-03-01


Unnamed: 0,account_id,expected_payment_as_of_2020-03-01
0,1,1093.0
1,2,1951.0
2,3,1665.0
3,4,1522.0
4,5,1593.5


In [13]:
# Export to csv
expected_payments_df.to_csv(f"../Output/nominal_expected_payments_as_of_{timestamp_str[:10]}.csv", index=False)