In [22]:
import os
import importlib
import pandas as pd
import Python as py

importlib.reload(py)

In [23]:
root = os.getcwd()
df_scheduled = pd.read_csv(root + "/data/scheduled_loan_repayments.csv")
df_actual = pd.read_csv(root + "/data/actual_loan_repayments.csv")
df_balances = py.calculate_df_balances(df_scheduled, df_actual)

# Question 1

In [None]:
def question_1(df_balances):
    """
    Calculate the percent of loans that defaulted as per the type 1 default definition.

    Args:
        df_balances (DataFrame): Dataframe created from the 'calculate_df_balances()' function

    Returns:
        float: The percentage of type 1 defaulted loans (ie 50.0 not 0.5)

    """
    
    
    def check_repayments(group):
        """
        Check if there was any missed repayment in the loan's history.

        A loan is flagged as defaulted (type 1) if at least one row has ActualRepayment < ScheduledRepayment.

        Args:
            group (DataFrame): A group of repayments for a single loan (LoanID).

        Returns:
            bool: True if defaulted, False otherwise.
        """
        return (group['ActualRepayment'] < group['ScheduledRepayment']).any()

    # Apply loan default type 1 check to each set of repayments for each loan
    df = df_balances.groupby('LoanID').apply(check_repayments, include_groups=False)
    
    # Calculate default type 1 rate as percentage
    default_rate_percent = df.mean()*100

    return default_rate_percent

15.0

# Question 2

In [None]:
def question_2(df_scheduled, df_balances):
    """
    Calculate the percent of loans that defaulted as per the type 2 default definition

    Args:
        df_balances (DataFrame): Dataframe created from the 'calculate_df_balances()' function
        df_scheduled (DataFrame): Dataframe created from the 'scheduled_loan_repayments.csv' dataset

    Returns:
        float: The percentage of type 2 defaulted loans (ie 50.0 not 0.5)

    """

    # Calculate the total repayment required per loan per year (monthly payment * 12)
    scheduled_total = df_scheduled.groupby('LoanID')['ScheduledRepayment'].first() * 12 # 12 months in a year

    # Calculate the actual repayment for each loan (there are only 12 months represented in the data)
    actual_total = df_balances.groupby('LoanID')['ActualRepayment'].sum() # Total payments

    # Calculate the difference
    unpaid = scheduled_total - actual_total

    # Clip instances where actual payment exceeded scheduled - overpayed
    unpaid = unpaid.clip(lower=0)

    # Calculate for each loan the percentage unpaid
    unpaid_percentage = (unpaid / scheduled_total) * 100

    # Identify defaulted loans (Type 2) (unpaid > 15%)
    default = unpaid_percentage > 15

    # Calculate default type 2 rate as percentage
    default_rate_percent = default.mean() * 100

    return default_rate_percent

question_2(df_scheduled,df_balances)

1.2

# Question 3

In [None]:
def question_3(df_balances):
    """
    Calculate the anualized portfolio CPR (As a %) from the geometric mean SMM.
    SMM is calculated as: (Unscheduled Principal)/(Start of Month Loan Balance)
    SMM_mean is calculated as (∏(1+SMM))^(1/12) - 1
    CPR is calcualted as: 1 - (1- SMM_mean)^12

    Args:
        df_balances (DataFrame): Dataframe created from the 'calculate_df_balances()' function

    Returns:
        float: The anualized CPR of the loan portfolio as a percent.

    """

    # A conditional prepayment rate (CPR) is an estimate of the percentage of a loan pool's principal that is likely to be paid off prematurely.

    return cpr_percent