In [1]:
import pandas as pd
import numpy as np

In [49]:
# Load the historical data provided by the client
data = pd.read_csv('data/loan_data.csv', delimiter=";")  

# Melt the data to reshape it into a long format
data = data.melt(id_vars=['Unnamed: 0', 'Origination Amount'], var_name='Date', value_name='Repayments')

# Rename columns for clarity
data.columns = ['vintage', 'origination_amount', 'date', 'repayment_amount']

#convert date 29.02.2020 to int
# data['date'] = data['date'].str.replace('.', '')
# data['date'] = data['date'].astype(int)

In [47]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 4 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   vintage             400 non-null    object 
 1   origination_amount  400 non-null    float64
 2   date                400 non-null    int32  
 3   repayment_amount    400 non-null    float64
dtypes: float64(2), int32(1), object(1)
memory usage: 11.1+ KB


In [48]:
data

Unnamed: 0,vintage,origination_amount,date,repayment_amount
0,31.05.2019,10018746.17,31052019,1443069.08
1,30.06.2019,10868379.04,31052019,0.00
2,31.07.2019,10733932.61,31052019,0.00
3,31.08.2019,12558727.02,31052019,0.00
4,30.09.2019,14505071.44,31052019,0.00
...,...,...,...,...
395,31.08.2020,25878820.95,31122020,1098593.34
396,30.09.2020,25887161.41,31122020,1115017.68
397,31.10.2020,27699586.46,31122020,1503544.68
398,30.11.2020,29872889.68,31122020,8383025.07


In [51]:
# # Compute the historical repayment percentages
# repayment_percentages = data.groupby('vintage')['repayment_amount'].sum() / data.groupby('vintage')['origination_amount'].sum()


# Convert the 'vintage' column to a datetime type
data['vintage'] = pd.to_datetime(data['vintage'], format='%d.%m.%Y')

# Extract the year or month from the 'vintage' column
data['vintage_year'] = data['vintage'].dt.year
data['vintage_month'] = data['vintage'].dt.month

# Compute the historical repayment percentages
repayment_percentages = data.groupby('vintage_year')['repayment_amount'].sum() / data.groupby('vintage_year')['origination_amount'].sum()


# Convert the indices of repayment_percentages to integers
repayment_percentages.index = repayment_percentages.index.map(int)

# Compute the expected repayment percentages
def compute_expected_repayment_percentages(repayment_percentages):
  """Computes the expected repayment percentages for a portfolio of merchant loans.

  Args:
    repayment_percentages: A Pandas Series containing the historical repayment percentages for each vintage.

  Returns:
    A Pandas Series containing the expected repayment percentages for each vintage.
  """

  expected_repayment_percentages = pd.Series()
  for vintage in repayment_percentages.index:
    if vintage == 1:
      expected_repayment_percentages[vintage] = repayment_percentages[vintage]
    elif vintage == 2:
      expected_repayment_percentages[vintage] = repayment_percentages[vintage] * 2
    else:
      expected_repayment_percentages[vintage] = max(repayment_percentages[vintage] * np.log(1 + (1 - vintage - 1) / 30) / (1 - float(sum(repayment_percentages[:vintage]))), 0)


  return expected_repayment_percentages

expected_repayment_percentages = compute_expected_repayment_percentages(repayment_percentages)

# Compute the forecasted cash flows
def compute_forecasted_cash_flows(origination_amounts, expected_repayment_percentages):
  """Computes the forecasted cash flows for a portfolio of merchant loans.

  Args:
    origination_amounts: A Pandas Series containing the origination amounts for each vintage.
    expected_repayment_percentages: A Pandas Series containing the expected repayment percentages for each vintage.

  Returns:
    A Pandas DataFrame containing the forecasted cash flows for each vintage and period.
  """

  forecasted_cash_flows = pd.DataFrame()
  for vintage in origination_amounts.index:
    vintage_forecasted_cash_flows = []
    for period in range(1, 31):
      vintage_forecasted_cash_flows.append(origination_amounts[vintage] * expected_repayment_percentages[vintage] * np.power(1 + 0.025 / 12, period - 1))

    forecasted_cash_flows[vintage] = vintage_forecasted_cash_flows

  return forecasted_cash_flows

forecasted_cash_flows = compute_forecasted_cash_flows(data.groupby('vintage')['origination_amount'].sum(), expected_repayment_percentages)

# Discount the forecasted cash flows to their present value
def discount_cash_flows(forecasted_cash_flows, discount_rate):
  """Discounts a series of cash flows to their present value.

  Args:
    forecasted_cash_flows: A Pandas DataFrame containing the forecasted cash flows for each vintage and period.
    discount_rate: The discount rate.

  Returns:
    A Pandas DataFrame containing the discounted cash flows for each vintage and period.
  """

  discounted_cash_flows = pd.DataFrame()
  for vintage in forecasted_cash_flows.index:
    vintage_discounted_cash_flows = []
    for period in range(1, 31):
      vintage_discounted_cash_flows.append(forecasted_cash_flows[vintage][period - 1] / np.power(1 + discount_rate, period))

    discounted_cash_flows[vintage] = vintage_discounted_cash_flows

  return discounted_cash_flows

discounted_cash_flows = discount_cash_flows(forecasted_cash_flows, 0.025)

# Compute the present value of the portfolio
portfolio_value = discounted_cash_flows.sum().sum()

# Print the results
print('Portfolio value:', portfolio_value)
print('Difference from client estimate:', portfolio_value - 84993122.67)


  expected_repayment_percentages = pd.Series()
  expected_repayment_percentages[vintage] = max(repayment_percentages[vintage] * np.log(1 + (1 - vintage - 1) / 30) / (1 - float(sum(repayment_percentages[:vintage]))), 0)


IndexError: index 2019 is out of bounds for axis 0 with size 0

In [11]:
data

Unnamed: 0.1,Unnamed: 0,Origination Amount,31.05.2019,30.06.2019,31.07.2019,31.08.2019,30.09.2019,31.10.2019,30.11.2019,31.12.2019,...,31.03.2020,30.04.2020,31.05.2020,30.06.2020,31.07.2020,31.08.2020,30.09.2020,31.10.2020,30.11.2020,31.12.2020
0,31.05.2019,10018746.17,1443069.08,3332200.33,1328138.75,928085.74,736418.27,539403.31,427557.86,324459.32,...,116684.68,92699.67,63399.66,53265.12,37121.13,29787.1,24524.9,18085.94,16581.01,11442.97
1,30.06.2019,10868379.04,0.0,1392751.6,3011884.91,1237868.7,970929.28,892351.83,668767.02,505612.59,...,255222.42,198833.96,161996.73,138461.91,92346.68,79641.3,63457.44,52373.85,43374.7,37404.87
2,31.07.2019,10733932.61,0.0,0.0,1537650.24,2953335.55,1208316.08,879375.19,711016.84,658251.4,...,302575.54,258652.52,191798.05,170027.54,127574.33,110301.21,89766.69,64746.84,61408.92,50312.7
3,31.08.2019,12558727.02,0.0,0.0,0.0,1617681.94,4082016.0,1387474.94,1247623.59,886293.35,...,417223.56,336686.08,253556.2,200066.59,151859.74,109973.0,90228.14,70661.5,53102.83,47069.84
4,30.09.2019,14505071.44,0.0,0.0,0.0,0.0,1992242.84,3930445.6,1394620.78,1227905.58,...,628429.48,589692.85,457299.31,323764.87,288152.28,239872.99,192246.98,171550.69,142575.97,116853.05
5,31.10.2019,15652952.2,0.0,0.0,0.0,0.0,0.0,2289453.76,4682354.31,1659503.89,...,763523.36,742787.97,558085.95,461806.22,358671.23,281881.11,241719.91,182730.05,144953.58,119260.1
6,30.11.2019,15107713.3,0.0,0.0,0.0,0.0,0.0,0.0,2162283.09,4637701.69,...,930720.35,697500.94,667277.73,547749.09,387987.02,309448.86,283876.04,215635.85,185516.45,141560.57
7,31.12.2019,17004745.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2402403.37,...,1245452.39,1116505.94,803590.21,724956.28,545397.33,458832.95,393971.01,333818.53,286831.44,216447.57
8,31.01.2020,16794379.95,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1505493.21,1182983.95,955821.48,846061.73,683116.81,560572.44,468203.76,349067.92,309854.99,267813.78
9,29.02.2020,19217205.82,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,6142911.08,1833677.81,1317065.75,1108494.37,918465.33,719913.69,587381.96,498801.31,371563.77,294941.22


In [28]:
# Melt the data to reshape it into a long format
data = data.melt(id_vars=['Unnamed: 0', 'Origination Amount'], var_name='Date', value_name='Repayments')

# Rename columns for clarity
data.columns = ['Vintage', 'Origination_Amount', 'Date', 'Repayments']

In [7]:
data

Unnamed: 0,Vintage,Origination_Amount,Date,Repayments
0,31.05.2019,10018746.17,31.05.2019,1443069.08
1,30.06.2019,10868379.04,31.05.2019,0.00
2,31.07.2019,10733932.61,31.05.2019,0.00
3,31.08.2019,12558727.02,31.05.2019,0.00
4,30.09.2019,14505071.44,31.05.2019,0.00
...,...,...,...,...
395,31.08.2020,25878820.95,31.12.2020,1098593.34
396,30.09.2020,25887161.41,31.12.2020,1115017.68
397,31.10.2020,27699586.46,31.12.2020,1503544.68
398,30.11.2020,29872889.68,31.12.2020,8383025.07


In [29]:
# Calculate historical repayment percentages
data['Repayment_Percentage'] = data['Repayments'] / data['Origination_Amount']

# Calculate expected repayment percentages
data['Expected_Repayment_Percentage'] = 0.0

for i in range(1, 31):  # For months 1 to 30
    if i == 1:
        data['Expected_Repayment_Percentage'][i] = data['Repayment_Percentage'][i]
    elif i == 2:
        data['Expected_Repayment_Percentage'][i] = 2 * data['Repayment_Percentage'][i - 1]
    else:
        p_j = data['Expected_Repayment_Percentage'][:i - 1].to_numpy()
        data['Expected_Repayment_Percentage'][i] = max(
            2 * data['Repayment_Percentage'][i - 1] * np.log(1 + 1 - (i - 1) / 30) / (1 - np.sum(p_j)), 0
        )


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Expected_Repayment_Percentage'][i] = data['Repayment_Percentage'][i]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Expected_Repayment_Percentage'][i] = 2 * data['Repayment_Percentage'][i - 1]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Expected_Repayment_Percentage'][i] = max(


In [21]:
# Calculate forecasted cash flows
data['Forecasted_Cash_Flow'] = data['Origination_Amount'] * data['Expected_Repayment_Percentage']


In [24]:
# Define the annual discount rate
annual_discount_rate = 0.025

# Convert the annual discount rate to a monthly rate
monthly_discount_rate = (1 + annual_discount_rate) ** (1 / 12) - 1

# Calculate the present value of forecasted cash flows
# data['Present_Value'] = data['Forecasted_Cash_Flow'] / (1 + monthly_discount_rate) ** (30 - data.index)

# Calculate the present value of forecasted cash flows
data['Present_Value'] = data['Forecasted_Cash_Flow'] / (1 + monthly_discount_rate) ** (30 - data.index)


ValueError: operands could not be broadcast together with shapes (400,) (0,) 

In [30]:
# # Step 2: Compute historical repayment percentages
# data['Repayment_Percentage'] = data['Repayments'] / data['Origination_Amount']

# # Step 3: Compute expected repayment percentages based on assumptions
# # First, calculate p2 for December 2020 vintage
# p1 = data.loc[(data['Vintage'] == '31.12.2020'), 'Repayment_Percentage'].values[0]
# p2 = 2 * p1

# # Calculate p3 to p30 based on the formula provided
# for i in range(3, 31):
#     pj = data.loc[(data.index < i - 1) & (data['Vintage'] != '31.12.2020'), 'Repayment_Percentage']
#     p2 = 2 * p1
#     pi = max(p2 * (np.log(1 + 1 - (i - 1) / 30) / (1 - np.sum(pj))), 0)
#     data.loc[(data.index == i - 1), 'Expected_Repayment_Percentage'] = pi

# Step 4: Compute forecasted cash flows using expected repayment percentages
data['Forecasted_Cash_Flow'] = data['Origination_Amount'] * data['Expected_Repayment_Percentage']

# Step 5: Compute the present value of forecasted cash flows
discount_rate_annual = 0.025  # 2.5% annual discount rate
discount_rate_monthly = (1 + discount_rate_annual) ** (1/12) - 1
data['Present_Value'] = data['Forecasted_Cash_Flow'] / ((1 + discount_rate_monthly) ** data.index)

# Step 6: Calculate the total portfolio value
portfolio_value = data.groupby('Vintage')['Present_Value'].sum().sum()

# Calculate the absolute and relative difference
client_estimate = 84993122.67  # Client's estimate
absolute_difference = abs(client_estimate - portfolio_value)
relative_difference = absolute_difference / client_estimate

# Determine if the difference is acceptable based on Jakob's threshold
threshold = 500000
acceptable_difference = absolute_difference <= threshold

# Output the results
print(f"Portfolio Value: CHF {portfolio_value:.2f}")
print(f"Absolute Difference: CHF {absolute_difference:.2f}")
print(f"Relative Difference: {relative_difference:.2%}")
print(f"Is the difference acceptable? {'Yes' if acceptable_difference else 'No'}")


Portfolio Value: CHF 2681688.63
Absolute Difference: CHF 82311434.04
Relative Difference: 96.84%
Is the difference acceptable? No


In [13]:

# Step 2: Compute historical repayment percentages
data['Repayment_Percentage'] = data['Repayments'] / data['Origination_Amount']

# Step 3: Compute expected repayment percentages based on assumptions
# First, calculate p2 for December 2020 vintage
p1 = data.loc[(data['Vintage'] == 'December 2020') & (data.index == 0), 'Repayment_Percentage'].values[0]
p2 = 2 * p1

# Calculate p3 to p30 based on the formula provided
for i in range(3, 31):
    pj = data.loc[(data.index < i - 1) & (data['Vintage'] != 'December 2020'), 'Repayment_Percentage']
    p2 = 2 * p1
    pi = max(p2 * (np.log(1 + 1 - (i - 1) / 30) / (1 - np.sum(pj))), 0)
    data.loc[(data.index == i - 1), 'Expected_Repayment_Percentage'] = pi

# Step 4: Compute forecasted cash flows using expected repayment percentages
data['Forecasted_Cash_Flow'] = data['Origination_Amount'] * data['Expected_Repayment_Percentage']

# Step 5: Compute the present value of forecasted cash flows
discount_rate_annual = 0.025  # 2.5% annual discount rate
discount_rate_monthly = (1 + discount_rate_annual) ** (1/12) - 1
data['Present_Value'] = data['Forecasted_Cash_Flow'] / ((1 + discount_rate_monthly) ** data.index)

# Step 6: Calculate the total portfolio value
portfolio_value = data.groupby('Vintage')['Present_Value'].sum().sum()

# Calculate the absolute and relative difference
client_estimate = 84993122.67  # Client's estimate
absolute_difference = abs(client_estimate - portfolio_value)
relative_difference = absolute_difference / client_estimate

# Determine if the difference is acceptable based on Jakob's threshold
threshold = 500000
acceptable_difference = absolute_difference <= threshold

# Output the results
print(f"Portfolio Value: CHF {portfolio_value:.2f}")
print(f"Absolute Difference: CHF {absolute_difference:.2f}")
print(f"Relative Difference: {relative_difference:.2%}")
print(f"Is the difference acceptable? {'Yes' if acceptable_difference else 'No'}")


IndexError: index 0 is out of bounds for axis 0 with size 0