# Loan Score Calculation

## Requirement 4

- Higher the loan score, higher the chances of getting your loan approved, and vice versa
- 3 Major criteria to calculate loan score
  - loan repayment history (last payment, total payment received) - only 20% weight
    
  - loan defaulters history (delinq-delinquet 2 yrs, pub_rec, ub_rec_bankruptcies, inq_last_6mnths) - only 45%

  - financial health data (home_ownership, loan_status, funded amount, grade pts-) - only 35%


In [1]:
# Create spark session
import getpass
from pyspark.sql import SparkSession


username = "itv008299"
spark = SparkSession.builder.\
    config('spark.shuffle.useOldFetchProtocol','true').\
    config('spark.ui.port', '0').\
    config('spark.sql.warehouse.dir', f'/user/{username}/warehouse').\
    enableHiveSupport().\
    master('yarn').\
    getOrCreate()

## Associate points to the grades in order to calculate the loan score

In [2]:
unacceptable_rated_pts = 0
very_bad_rated_pts = 100
bad_rated_pts = 250
good_rated_pts = 500
very_good_rated_pts = 650
excellent_rated_pts = 800

In [3]:
unacceptable_grade_pts = 750
very_bad_grade_pts = 1000
bad_grade_pts = 1500
good_grade_pts = 2000
very_good_grade_pts = 2500

## Tables required to calculate the loan score

- customers_new
- loan_def_delinq_new
- loan_def_detail_record_enq_new

## Loan Score Calculation Criteria 1: Payment History

- last payment amount < monthly installment * 0.5, then very_bad_rates_pts
- last payment amount >= monthly installment * 0.5 and  < monthly installment, then bad_rates_pts
- last payment amount = monthly installment, then good_rates_pts

In [4]:
bad_customer_data_df = spark.read\
.format("csv")\
.option("header", True)\
.option("inferSchema", True)\
.load("/user/itv008299/lendingclubproject/bad_data/bad_customer_data_final")

In [5]:
bad_customer_data_df.createOrReplaceTempView("bad_data_customer")

In [10]:
ph_df = spark.sql(f"select c.member_id, \
   case \
       when p.last_payment_amount < (c.monthly_installment * 0.5) then {very_bad_rated_pts} \
       when p.last_payment_amount >= (c.monthly_installment * 0.5) and p.last_payment_amount < c.monthly_installment then {very_bad_rated_pts} \
       when (p.last_payment_amount = (c.monthly_installment)) then {good_rated_pts} \
       when p.last_payment_amount > (c.monthly_installment) and p.last_payment_amount <= (c.monthly_installment * 1.50) then {very_good_rated_pts} \
       when p.last_payment_amount > (c.monthly_installment * 1.50) then {excellent_rated_pts} \
       else {unacceptable_rated_pts} \
   end as last_payment_pts, \
   case \
       when p.total_payment_received >= (c.funded_amount * 0.50) then {very_good_rated_pts} \
       when p.total_payment_received < (c.funded_amount * 0.50) and p.total_payment_received > 0 then {good_rated_pts} \
       when p.total_payment_received = 0 or (p.total_payment_received) is null then {unacceptable_rated_pts} \
   end as total_payment_pts \
from itv008299_lending_club.loan_repayments p \
inner join itv008299_lending_club.loans c on c.loan_id = p.loan_id where member_id NOT IN (select member_id from bad_data_customer)")

In [20]:
ph_df.createOrReplaceTempView("ph_pts")

In [22]:
spark.sql("select * from ph_pts")

member_id,last_payment_pts,total_payment_pts
dcec9334e70f1cc95...,800,650
fc58ca61f51f9dcac...,500,650
2fb62a6ca51063b11...,500,650
488268a5531951622...,800,650
ade6026208e48f5f9...,500,650
7c8b0ca6acddfaeb1...,800,650
a707b7fe7c38bad65...,800,650
1df639cddea30c288...,800,650
22d67005e12d8726d...,500,650
009cf312bd46551b4...,500,650


## Loan Score Calculation Criteria 2: Loan Defaulters History (ldh)

In [21]:
ldh_ph_df = spark.sql(
    f"select p.*, \
    CASE \
        WHEN d.delinq_2yrs = 0 THEN {excellent_rated_pts} \
        WHEN d.delinq_2yrs BETWEEN 1 AND 2 THEN {bad_rated_pts} \
        WHEN d.delinq_2yrs BETWEEN 3 AND 5 THEN {very_bad_rated_pts} \
        WHEN d.delinq_2yrs > 5 OR d.delinq_2yrs IS NULL THEN {unacceptable_grade_pts} \
    END AS delinq_pts, \
    CASE \
        WHEN l.pub_rec = 0 THEN {excellent_rated_pts} \
        WHEN l.pub_rec BETWEEN 1 AND 2 THEN {bad_rated_pts} \
        WHEN l.pub_rec BETWEEN 3 AND 5 THEN {very_bad_rated_pts} \
        WHEN l.pub_rec > 5 OR l.pub_rec IS NULL THEN {very_bad_rated_pts} \
    END AS public_records_pts, \
    CASE \
        WHEN l.pub_rec_bankruptcies = 0 THEN {excellent_rated_pts} \
        WHEN l.pub_rec_bankruptcies BETWEEN 1 AND 2 THEN {bad_rated_pts} \
        WHEN l.pub_rec_bankruptcies BETWEEN 3 AND 5 THEN {very_bad_rated_pts} \
        WHEN l.pub_rec_bankruptcies > 5 OR l.pub_rec_bankruptcies IS NULL THEN {very_bad_rated_pts} \
    END as public_bankruptcies_pts, \
    CASE \
        WHEN l.inq_last_6mths = 0 THEN {excellent_rated_pts} \
        WHEN l.inq_last_6mths BETWEEN 1 AND 2 THEN {bad_rated_pts} \
        WHEN l.inq_last_6mths BETWEEN 3 AND 5 THEN {very_bad_rated_pts} \
        WHEN l.inq_last_6mths > 5 OR l.inq_last_6mths IS NULL THEN {unacceptable_rated_pts} \
    END AS enq_pts \
    FROM itv008299_lending_club.loan_def_detail_record_enq_new l \
    INNER JOIN itv008299_lending_club.loan_def_delinq_new d ON d.member_id = l.member_id  \
    INNER JOIN ph_pts p ON p.member_id = l.member_id where l.member_id NOT IN (select member_id from bad_data_customer)")

In [23]:
ldh_ph_df.createOrReplaceTempView("ldh_ph_pts")

In [24]:
spark.sql("select * from ldh_ph_pts")

member_id,last_payment_pts,total_payment_pts,delinq_pts,public_records_pts,public_bankruptcies_pts,enq_pts
003e1e6cbd2920bbb...,500,650,250,250,250,800
005b4c3db3fce07dc...,500,650,250,250,800,250
00fc8144cb210ba8c...,500,650,250,250,250,800
017ce564dc0d6f975...,500,500,250,800,800,250
01b39bf7187932c4a...,800,650,250,250,800,800
021a6ce1b67f3bc81...,500,500,250,250,250,250
02844f1aff78c7121...,800,650,250,250,800,800
02ad95cbf42e6d4dd...,800,650,250,800,800,250
02f69de1384fcf78c...,100,650,250,800,800,250
032503cc8f86dea72...,500,500,250,800,800,250


## Loan Score Calculation Criteria 3: Financial Health Data

In [27]:
fh_ldh_ph_df = spark.sql(f"select ldef.*, \
   CASE \
       WHEN LOWER(l.loan_status) LIKE '%fully paid%' THEN {excellent_rated_pts} \
       WHEN LOWER(l.loan_status) LIKE '%current%' THEN {good_rated_pts} \
       WHEN LOWER(l.loan_status) LIKE '%in grace period%' THEN {bad_rated_pts} \
       WHEN LOWER(l.loan_status) LIKE '%late (16-30 days)%' OR LOWER(l.loan_status) LIKE '%late (31-120 days)%' THEN {very_bad_rated_pts} \
       WHEN LOWER(l.loan_status) LIKE '%charged off%' THEN {unacceptable_rated_pts} \
       else {unacceptable_rated_pts} \
   END AS loan_status_pts, \
   CASE \
       WHEN LOWER(a.home_ownership) LIKE '%own' THEN {excellent_rated_pts} \
       WHEN LOWER(a.home_ownership) LIKE '%rent' THEN {good_rated_pts} \
       WHEN LOWER(a.home_ownership) LIKE '%mortgage' THEN {bad_rated_pts} \
       WHEN LOWER(a.home_ownership) LIKE '%any' OR LOWER(a.home_ownership) IS NULL THEN {very_bad_rated_pts} \
   END AS home_pts, \
   CASE \
       WHEN l.funded_amount <= (a.total_high_credit_limit * 0.10) THEN {excellent_rated_pts} \
       WHEN l.funded_amount > (a.total_high_credit_limit * 0.10) AND l.funded_amount <= (a.total_high_credit_limit * 0.20) THEN {very_good_rated_pts} \
       WHEN l.funded_amount > (a.total_high_credit_limit * 0.20) AND l.funded_amount <= (a.total_high_credit_limit * 0.30) THEN {good_rated_pts} \
       WHEN l.funded_amount > (a.total_high_credit_limit * 0.30) AND l.funded_amount <= (a.total_high_credit_limit * 0.50) THEN {bad_rated_pts} \
       WHEN l.funded_amount > (a.total_high_credit_limit * 0.50) AND l.funded_amount <= (a.total_high_credit_limit * 0.70) THEN {very_bad_rated_pts} \
       WHEN l.funded_amount > (a.total_high_credit_limit * 0.70) THEN {unacceptable_rated_pts} \
       else {unacceptable_rated_pts} \
   END AS credit_limit_pts, \
   CASE \
       WHEN (a.grade) = 'A' and (a.sub_grade)='A1' THEN {excellent_rated_pts} \
       WHEN (a.grade) = 'A' and (a.sub_grade)='A2' THEN ({excellent_rated_pts} * 0.95) \
       WHEN (a.grade) = 'A' and (a.sub_grade)='A3' THEN ({excellent_rated_pts} * 0.90) \
       WHEN (a.grade) = 'A' and (a.sub_grade)='A4' THEN ({excellent_rated_pts} * 0.85) \
       WHEN (a.grade) = 'A' and (a.sub_grade)='A5' THEN ({excellent_rated_pts} * 0.80) \
       WHEN (a.grade) = 'B' and (a.sub_grade)='B1' THEN ({very_good_rated_pts}) \
       WHEN (a.grade) = 'B' and (a.sub_grade)='B2' THEN ({very_good_rated_pts} * 0.95) \
       WHEN (a.grade) = 'B' and (a.sub_grade)='B3' THEN ({very_good_rated_pts} * 0.90) \
       WHEN (a.grade) = 'B' and (a.sub_grade)='B4' THEN ({very_good_rated_pts} * 0.85) \
       WHEN (a.grade) = 'B' and (a.sub_grade)='B5' THEN ({very_good_rated_pts} * 0.80) \
       WHEN (a.grade) = 'C' and (a.sub_grade)='C1' THEN ({good_rated_pts}) \
       WHEN (a.grade) = 'C' and (a.sub_grade)='C2' THEN ({good_rated_pts} * 0.95) \
       WHEN (a.grade) = 'C' and (a.sub_grade)='C3' THEN ({good_rated_pts} * 0.90) \
       WHEN (a.grade) = 'C' and (a.sub_grade)='C4' THEN ({good_rated_pts} * 0.85) \
       WHEN (a.grade) = 'C' and (a.sub_grade)='C5' THEN ({good_rated_pts} * 0.80) \
       WHEN (a.grade) = 'D' and (a.sub_grade)='D1' THEN ({bad_rated_pts}) \
       WHEN (a.grade) = 'D' and (a.sub_grade)='D2' THEN ({bad_rated_pts} * 0.95) \
       WHEN (a.grade) = 'D' and (a.sub_grade)='D3' THEN ({bad_rated_pts} * 0.90) \
       WHEN (a.grade) = 'D' and (a.sub_grade)='D4' THEN ({bad_rated_pts} * 0.85) \
       WHEN (a.grade) = 'D' and (a.sub_grade)='D5' THEN ({bad_rated_pts} * 0.80) \
       WHEN (a.grade) = 'E' and (a.sub_grade)='E1' THEN ({very_bad_rated_pts}) \
       WHEN (a.grade) = 'E' and (a.sub_grade)='E2' THEN ({very_bad_rated_pts} * 0.95) \
       WHEN (a.grade) = 'E' and (a.sub_grade)='E3' THEN ({very_bad_rated_pts} * 0.90) \
       WHEN (a.grade) = 'E' and (a.sub_grade)='E4' THEN ({very_bad_rated_pts} * 0.85) \
       WHEN (a.grade) = 'E' and (a.sub_grade)='E5' THEN ({very_bad_rated_pts} * 0.80) \
       WHEN (a.grade) in ('F', 'G') THEN ({unacceptable_rated_pts}) \
   END AS grade_pts \
   FROM ldh_ph_pts ldef \
   INNER JOIN itv008299_lending_club.loans l ON ldef.member_id = l.member_id \
   INNER JOIN itv008299_lending_club.customers_new a ON a.member_id = ldef.member_id where ldef.member_id NOT IN (select member_id from bad_data_customer)") 

In [28]:
fh_ldh_ph_df.createOrReplaceTempView("fh_ldh_ph_pts")

## FinL loan score calculation by considering all the 3 criterias with the following percentage

1. payment history = 20%

2. Loan defaults = 45%

3. Financial health = 35%

In [30]:
loan_score = spark.sql("""
SELECT 
    member_id,
    ((last_payment_pts+total_payment_pts)*0.20) as payment_history_pts,
    ((delinq_pts + public_records_pts + public_bankruptcies_pts + enq_pts) * 0.45) as defaulters_history_pts,
    ((loan_status_pts + home_pts + credit_limit_pts + grade_pts)*0.35) as financial_health_pts
FROM fh_ldh_ph_pts
""")

In [31]:
loan_score

member_id,payment_history_pts,defaulters_history_pts,financial_health_pts
003e1e6cbd2920bbb...,230.0,697.5,766.5
005b4c3db3fce07dc...,230.0,697.5,619.5
00fc8144cb210ba8c...,230.0,697.5,717.5
017ce564dc0d6f975...,200.0,945.0,591.5
01b39bf7187932c4a...,290.0,945.0,939.75
021a6ce1b67f3bc81...,200.0,450.0,758.625
02844f1aff78c7121...,290.0,945.0,787.5
02ad95cbf42e6d4dd...,290.0,945.0,829.5
02f69de1384fcf78c...,150.0,945.0,864.5
032503cc8f86dea72...,200.0,945.0,647.5


In [32]:
final_loan_score = loan_score.withColumn('loan_score', loan_score.payment_history_pts + loan_score.defaulters_history_pts + loan_score.financial_health_pts)

In [33]:
final_loan_score.createOrReplaceTempView("loan_score_eval")

In [35]:
loan_score_final = spark.sql(f"""
select ls.*, 
case 
    WHEN loan_score > {very_good_grade_pts} THEN 'A' 
    WHEN loan_score <= {very_good_grade_pts} AND loan_score > {good_grade_pts} THEN 'B' 
    WHEN loan_score <= {good_grade_pts} AND loan_score > {bad_grade_pts} THEN 'C' 
    WHEN loan_score <= {bad_grade_pts} AND loan_score  > {very_bad_grade_pts} THEN 'D' 
    WHEN loan_score <= {very_bad_grade_pts} AND loan_score > {unacceptable_grade_pts} THEN 'E'  
    WHEN loan_score <= {unacceptable_grade_pts} THEN 'F' 
end as loan_final_grade 
from loan_score_eval ls""")

In [36]:
loan_score_final.createOrReplaceTempView("loan_final_table")

In [37]:
loan_score_final.write \
.format("parquet") \
.mode("overwrite") \
.option("path", "/user/itv008299/lendingclubproject/processed/loan_score") \
.save()

In [38]:
spark.stop()