In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from src.kpi_calculator import LoanKPICalculator
import warnings
warnings.filterwarnings('ignore')



In [2]:
df = pd.read_csv('../data/processed/loan_data_clean.csv')
print(f"Dataset loaded: {len(df)} records, {len(df.columns)} columns")
df.head()

Dataset loaded: 35831 records, 30 columns


Unnamed: 0,id,address_state,application_type,emp_length,emp_title,grade,home_ownership,issue_date,last_credit_pull_date,last_payment_date,...,int_rate,loan_amount,total_acc,total_payment,loan_category,issue_year,issue_month,issue_month_name,income_bracket,dti_category
0,1077430,GA,INDIVIDUAL,< 1 year,Ryder,C,RENT,2021-05-07,2021-04-12,2021-07-13,...,0.1527,2500.0,4,1009.0,Bad Loan,2021,5,May,<30K,Low Risk
1,1072053,CA,INDIVIDUAL,9 years,MKC Accounting,E,RENT,2021-05-24,2021-09-19,2021-08-28,...,0.1864,3000.0,4,3939.0,Good Loan,2021,5,May,30-50K,Low Risk
2,1069243,CA,INDIVIDUAL,4 years,Chemat Technology Inc,C,RENT,2021-03-18,2021-06-07,2021-01-30,...,0.1596,12000.0,11,3522.0,Bad Loan,2021,3,March,30-50K,High Risk
3,1041756,TX,INDIVIDUAL,< 1 year,barnes distribution,B,MORTGAGE,2021-04-22,2021-03-24,2021-01-08,...,0.1065,4500.0,9,4911.0,Good Loan,2021,4,April,30-50K,Low Risk
4,1068350,IL,INDIVIDUAL,10+ years,J&J Steel Inc,A,MORTGAGE,2021-01-25,2021-06-26,2021-08-12,...,0.0603,3500.0,28,3835.0,Good Loan,2021,1,January,75-100K,Low Risk


In [3]:
print(f"Date range: {df['issue_date'].min()} to {df['issue_date'].max()}")
print(f"Loan status distribution:")
print(df['loan_status'].value_counts())
print(f"\nLoan category distribution:")
print(df['loan_category'].value_counts())

Date range: 2021-01-01 to 2021-12-31
Loan status distribution:
loan_status
Fully Paid     29989
Charged Off     4930
Current          912
Name: count, dtype: int64

Loan category distribution:
loan_category
Good Loan    30901
Bad Loan      4930
Name: count, dtype: int64


In [4]:
# 1. Total Loan Applications
total_applications = len(df)
print(f"ðŸ“Š Total Loan Applications: {total_applications:,}")

# 2. Total Funded Amount  
total_funded = df['loan_amount'].sum()
print(f"ðŸ’° Total Funded Amount: ${total_funded:,.2f}")

# 3. Total Amount Received
total_received = df['total_payment'].sum()
print(f"ðŸ“ˆ Total Amount Received: ${total_received:,.2f}")

# 4. Average Interest Rate
avg_interest_rate = df['int_rate'].mean()
print(f"ðŸ“Š Average Interest Rate: {avg_interest_rate:.2%}")

# 5. Average Debt-to-Income Ratio (DTI)
avg_dti = df['dti'].mean()
print(f"ðŸ“‹ Average DTI Ratio: {avg_dti:.2%}")

ðŸ“Š Total Loan Applications: 35,831
ðŸ’° Total Funded Amount: $371,697,550.00
ðŸ“ˆ Total Amount Received: $403,032,087.00
ðŸ“Š Average Interest Rate: 11.90%
ðŸ“‹ Average DTI Ratio: 13.47%


In [5]:
# GOOD VS BAD LOAN KPIs
print("\n=== GOOD VS BAD LOAN KPIs ===\n")

# Separate good and bad loans
good_loans = df[df['loan_category'] == 'Good Loan']
bad_loans = df[df['loan_category'] == 'Bad Loan']

# Good Loan KPIs
print("\n GOOD LOAN METRICS:")
good_loan_percentage = (len(good_loans) / total_applications) * 100
good_loan_applications = len(good_loans)
good_loan_funded = good_loans['loan_amount'].sum()
good_loan_received = good_loans['total_payment'].sum()

print(f"   â€¢ Good Loan Application Percentage: {good_loan_percentage:.1f}%")
print(f"   â€¢ Good Loan Applications: {good_loan_applications:,}")
print(f"   â€¢ Good Loan Funded Amount: ${good_loan_funded:,.2f}")
print(f"   â€¢ Good Loan Total Received Amount: ${good_loan_received:,.2f}")

# Bad Loan KPIs  
print("\n BAD LOAN METRICS:")
bad_loan_percentage = (len(bad_loans) / total_applications) * 100
bad_loan_applications = len(bad_loans)
bad_loan_funded = bad_loans['loan_amount'].sum()
bad_loan_received = bad_loans['total_payment'].sum()

print(f"   â€¢ Bad Loan Application Percentage: {bad_loan_percentage:.1f}%")
print(f"   â€¢ Bad Loan Applications: {bad_loan_applications:,}")
print(f"   â€¢ Bad Loan Funded Amount: ${bad_loan_funded:,.2f}")
print(f"   â€¢ Bad Loan Total Received Amount: ${bad_loan_received:,.2f}")


=== GOOD VS BAD LOAN KPIs ===


 GOOD LOAN METRICS:
   â€¢ Good Loan Application Percentage: 86.2%
   â€¢ Good Loan Applications: 30,901
   â€¢ Good Loan Funded Amount: $316,942,400.00
   â€¢ Good Loan Total Received Amount: $371,953,728.00

 BAD LOAN METRICS:
   â€¢ Bad Loan Application Percentage: 13.8%
   â€¢ Bad Loan Applications: 4,930
   â€¢ Bad Loan Funded Amount: $54,755,150.00
   â€¢ Bad Loan Total Received Amount: $31,078,359.00


In [6]:
# MONTH-TO-DATE (MTD) CALCULATIONS
print("\n=== MONTH-TO-DATE (MTD) ANALYSIS ===\n")

# Get current month data (assuming December 2021 as current based on your data)
current_month = 12
current_year = 2021
current_month_data = df[(df['issue_month'] == current_month) & (df['issue_year'] == current_year)]

print(f"Current Period: {current_month}/{current_year}")
print(f"MTD Loan Applications: {len(current_month_data):,}")
print(f"MTD Total Funded Amount: ${current_month_data['loan_amount'].sum():,.2f}")
print(f"MTD Total Amount Received: ${current_month_data['total_payment'].sum():,.2f}")
print(f"MTD Average Interest Rate: {current_month_data['int_rate'].mean():.2%}")
print(f"MTD Average DTI: {current_month_data['dti'].mean():.2%}")



=== MONTH-TO-DATE (MTD) ANALYSIS ===

Current Period: 12/2021
MTD Loan Applications: 3,065
MTD Total Funded Amount: $32,164,450.00
MTD Total Amount Received: $34,845,862.00
MTD Average Interest Rate: 11.89%
MTD Average DTI: 13.44%


In [7]:
# Create comprehensive KPI summary
kpi_summary = pd.DataFrame({
    'KPI': [
        'Total Loan Applications',
        'Total Funded Amount ($)',
        'Total Amount Received ($)', 
        'Average Interest Rate (%)',
        'Average DTI (%)',
        'Good Loan Applications',
        'Good Loan Percentage (%)',
        'Bad Loan Applications', 
        'Bad Loan Percentage (%)',
        'Recovery Rate (%)'
    ],
    'Value': [
        f"{total_applications:,}",
        f"${total_funded:,.2f}",
        f"${total_received:,.2f}",
        f"{avg_interest_rate:.2%}",
        f"{avg_dti:.2%}", 
        f"{good_loan_applications:,}",
        f"{good_loan_percentage:.1f}%",
        f"{bad_loan_applications:,}",
        f"{bad_loan_percentage:.1f}%",
        f"{(total_received/total_funded)*100:.1f}%"
    ]
})

print(kpi_summary.to_string(index=False))


                      KPI           Value
  Total Loan Applications          35,831
  Total Funded Amount ($) $371,697,550.00
Total Amount Received ($) $403,032,087.00
Average Interest Rate (%)          11.90%
          Average DTI (%)          13.47%
   Good Loan Applications          30,901
 Good Loan Percentage (%)           86.2%
    Bad Loan Applications           4,930
  Bad Loan Percentage (%)           13.8%
        Recovery Rate (%)          108.4%


In [8]:
# Cell 7: EXPORT KPI DATA FOR POWER BI
# Create KPI data for Power BI dashboard
kpi_data_export = {
    'KPI_Name': [
        'Total Applications', 'Total Funded', 'Total Received', 
        'Avg Interest Rate', 'Avg DTI', 'Good Loan %', 'Bad Loan %'
    ],
    'KPI_Value': [
        total_applications, total_funded, total_received,
        avg_interest_rate, avg_dti, good_loan_percentage, bad_loan_percentage
    ],
    'KPI_Category': [
        'Volume', 'Amount', 'Amount', 'Rate', 'Risk', 'Quality', 'Quality'
    ]
}

kpi_export_df = pd.DataFrame(kpi_data_export)
kpi_export_df.to_csv('../data/exports/kpi_summary.csv', index=False)
print(" KPI data exported for Power BI")


 KPI data exported for Power BI
