In [10]:
import os
import importlib
import pandas as pd
import Python as py
import numpy as np
importlib.reload(py)

<module 'Python' from '/app/Task_2/Python.py'>

In [18]:
root = os.getcwd()
df_scheduled = pd.read_csv(root + "/data/scheduled_loan_repayments.csv")
df_actual = pd.read_csv(root + "/data/actual_loan_repayments.csv")
df_balances = py.calculate_df_balances(df_scheduled, df_actual)

# Question 1

In [4]:
def question_1(df_balances):
    """
    Calculate the percent of loans that defaulted as per the type 1 default definition.

    Args:
        df_balances (DataFrame): Dataframe created from the 'calculate_df_balances()' function

    Returns:
        float: The percentage of type 1 defaulted loans (ie 50.0 not 0.5)

    """
    
    
    def check_repayments(group):
        """
        Check if there was any missed repayment in the loan's history.

        A loan is flagged as defaulted (type 1) if at least one row has ActualRepayment < ScheduledRepayment.

        Args:
            group (DataFrame): A group of repayments for a single loan (LoanID).

        Returns:
            bool: True if defaulted, False otherwise.
        """
        return (group['ActualRepayment'] < group['ScheduledRepayment']).any()

    # Apply loan default type 1 check to each set of repayments for each loan
    df = df_balances.groupby('LoanID').apply(check_repayments, include_groups=False)
    
    # Calculate default type 1 rate as percentage
    default_rate_percent = df.mean()*100

    return default_rate_percent

question_1(df_balances)

15.0

# Question 2

In [5]:
def question_2(df_scheduled, df_balances):
    """
    Calculate the percent of loans that defaulted as per the type 2 default definition

    Args:
        df_balances (DataFrame): Dataframe created from the 'calculate_df_balances()' function
        df_scheduled (DataFrame): Dataframe created from the 'scheduled_loan_repayments.csv' dataset

    Returns:
        float: The percentage of type 2 defaulted loans (ie 50.0 not 0.5)

    """

    # Calculate the total repayment required per loan per year (monthly payment * 12)
    scheduled_total = df_scheduled.groupby('LoanID')['ScheduledRepayment'].first() * 12 # 12 months in a year

    # Calculate the actual repayment for each loan (there are only 12 months represented in the data)
    actual_total = df_balances.groupby('LoanID')['ActualRepayment'].sum() # Total payments

    # Calculate the difference
    unpaid = scheduled_total - actual_total

    # Clip instances where actual payment exceeded scheduled - overpayed
    unpaid = unpaid.clip(lower=0)

    # Calculate for each loan the percentage unpaid
    unpaid_percentage = (unpaid / scheduled_total) * 100

    # Identify defaulted loans (Type 2) (unpaid > 15%)
    default = unpaid_percentage > 15

    # Calculate default type 2 rate as percentage
    default_rate_percent = default.mean() * 100

    return default_rate_percent

question_2(df_scheduled,df_balances)

1.2

# Question 3

In [32]:
def question_3(df_balances):
    """
    Calculate the anualized portfolio CPR (As a %) from the geometric mean SMM.
    SMM is calculated as: (Unscheduled Principal)/(Start of Month Loan Balance)
    SMM_mean is calculated as (∏(1+SMM))^(1/12) - 1
    CPR is calcualted as: 1 - (1- SMM_mean)^12

    Definitions: (Had Homework to do) from: https://www.investopedia.com/terms/a/amortization.asp
    Unscheduled principal refers to any amount paid towards the principal balance of a loan that exceeds the regular, scheduled payments. 
    Scheduled Principal: This is the regular monthly payment of principal that a borrower is required to make according to the loan agreement. 
    Unscheduled Principal: This is any additional amount paid towards the principal balance beyond the scheduled payment.
    Principle Balance: The principal balance of a loan is the outstanding amount of money you still owe, excluding interest and fees

    To calculate Unsheduled Principle:
    1. Identify the Scheduled Principal:
    This is the regular, required amount of principal that the borrower is obligated to pay each month. - ScheduledRepayment
    2. Identify the Actual Principal Paid:
    This is the total amount of principal paid in the period, including any extra amounts paid beyond the scheduled amount. 
    3. Calculate the Unscheduled Principal:
    Subtract the scheduled principal from the actual principal paid.

    Args:
        df_balances (DataFrame): Dataframe created from the 'calculate_df_balances()' function

    Returns:
        float: The anualized CPR of the loan portfolio as a percent.

    """
    # Copying df_balances to a different frame
    df = df_balances.copy()

    # Principal Payment = Total Monthly Payment - (Outstanding Loan Balance * (Interest Rate / 12)) => InterestPayment
    df['PrincipalPayment'] = df['ActualRepayment'] - df['InterestPayment']

    # Payment towards interest is included in the sheduled montly payment
    # Scheduled principal = scheduled total repayment - interest portion
    df['ScheduledPrincipal'] = df['ScheduledRepayment'] - df['InterestPayment']

    # Unscheduled principal = any additional payment towards principal
    df['UnscheduledPrincipal'] = df['PrincipalPayment'] - df['ScheduledPrincipal']

    # Calculate SMM
    df['SMM'] = df['UnscheduledPrincipal'] / df['LoanBalanceStart']

    # Calculate geometric mean of (1 + SMM)
    smm_product = (1 + df['SMM']).prod()

    smm_mean = smm_product ** (1 / 12) - 1

    # Calculate CPR as annualized prepayment rate
    cpr = 1 - (1 - smm_mean) ** 12
    cpr_percent = cpr * 100

    return cpr_percent

question_3(df_balances)

-229010586008.01517

# Question 4

In [36]:
def question_4(df_balances):
    """
    Calculate the predicted total loss for the second year in the loan term.
    Use the equation: probability_of_default * total_loan_balance * (1 - recovery_rate).
    The probability_of_default value must be taken from either your question_1 or question_2 answer.
    Decide between the two answers based on which default definition you believe to be the more useful metric.
    Assume a recovery rate of 80%

    Definitions: (More homework)
    Recovery Rate: The recovery rate is the principal and accrued interest on defaulted debt that can be recovered, expressed as a percentage of face value.
    Loan Balance: The total loan balance refers to the unpaid principal amount of a loan plus any accrued interest and other fees.

    Args:
        df_balances (DataFrame): Dataframe created from the 'calculate_df_balances()' function

    Returns:
        float: The predicted total loss for the second year in the loan term.

    """

    default_percentage = question_1(df_balances)/100

    total_balances = (df_balances.groupby('LoanID')['LoanBalanceEnd'].last()).sum()

    total_loss = default_percentage * total_balances * (1-0.8)

    return total_loss


question_4(df_balances)

979573.1690999997