In [3]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as f
from pyspark.sql.window import Window
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_selection import chi2
import matplotlib.pyplot as plt
import pandas as pd
spark = SparkSession.builder \
    .appName("Analysis") \
    .getOrCreate()

### Loading datase

In [5]:
master_table= (
    spark.read.csv(
        "../dataset/New/credit_score_detail_3w_report.csv", 
        header=True,
        inferSchema=True
    )
    .select(
        "FAC NO",
        "Branch Name",
        "PRODUCT Name",
        "Equipment Category Description",
        "Scheme Description",
        "Make Description",
        "District Name",
        "Connex Date",
        "Lease Amount",
        "Portfolio",
        "Int Rate",
        "Future Capital",
        "TOTAL DUE",
        "TOTAL COLLECTION",
        "TOTAL COLLECTION RATIO",
        "Collection For The Month",
        "Collection Ratio",
        "DPD",
        "90>=day_contract",
        "Credit Decision",
        "Credit Score"
    )
)

In [6]:
master_table.show()

+-------------------+--------------------+--------------------+------------------------------+--------------------+----------------+-------------+-----------+------------+---------+--------+--------------+---------+----------------+----------------------+------------------------+----------------+----+----------------+---------------+------------+
|             FAC NO|         Branch Name|        PRODUCT Name|Equipment Category Description|  Scheme Description|Make Description|District Name|Connex Date|Lease Amount|Portfolio|Int Rate|Future Capital|TOTAL DUE|TOTAL COLLECTION|TOTAL COLLECTION RATIO|Collection For The Month|Collection Ratio| DPD|90>=day_contract|Credit Decision|Credit Score|
+-------------------+--------------------+--------------------+------------------------------+--------------------+----------------+-------------+-----------+------------+---------+--------+--------------+---------+----------------+----------------------+------------------------+----------------+----+

# 3W after automated 

In [23]:
after_automated_3w= (
    master_table
    .filter(f.col("Connex Date").between("2022-04-01","2023-04-01"))
)

In [24]:
after_automated_3w.agg(f.min("Connex Date"),f.max("Connex Date")).show()

+----------------+----------------+
|min(Connex Date)|max(Connex Date)|
+----------------+----------------+
|      2022-04-01|      2023-04-01|
+----------------+----------------+



#### Dividing the overall Score in to slabs (Credit score)



In [25]:
after_automated_3w_Credit_score = (
    after_automated_3w
    .select(
        "FAC NO",
        "Connex Date",
        "Portfolio",
        "TOTAL COLLECTION",
        "TOTAL DUE",
        "90>=day_contract",
        "Credit Decision",
        "Credit Score"
           )
    .withColumn("Overall_score_slab",
                f.when(f.col("Credit Score")<=40,"below_40")
               .when(f.col("Credit Score").between(40,49),"40-50")
                .when(f.col("Credit Score").between(50,59),"50-60")
                .when(f.col("Credit Score").between(60,69),"60-70")
                .when(f.col("Credit Score").between(70,79),"70-80")
                .when(f.col("Credit Score").between(80,89),"80-90")
                .otherwise("90_above")
        )
)

In [26]:
after_automated_3w_Credit_score.select("Overall_score_slab").distinct().show()

+------------------+
|Overall_score_slab|
+------------------+
|          below_40|
|             40-50|
|          90_above|
|             80-90|
|             60-70|
|             50-60|
|             70-80|
+------------------+



In [27]:
overall_score_slab=(
    after_automated_3w_Credit_score
     .groupBy("Overall_score_slab")
     .agg(
         f.count("FAC NO").alias("Facility Count"),
         f.sum("Portfolio").alias("Total Portfolio"),
         f.sum("TOTAL COLLECTION").alias("TOTAL COLLECTION"),
         f.sum("TOTAL DUE").alias("TOTAL DUE"),
         f.sum("90>=day_contract").alias("NPA")
     )
    .withColumn("Total Collection Ratio",f.col("TOTAL COLLECTION")/f.col("TOTAL DUE"))
    ).orderBy("Total Collection Ratio")

In [28]:
overall_score_slab.show()

+------------------+--------------+---------------+----------------+----------+---+----------------------+
|Overall_score_slab|Facility Count|Total Portfolio|TOTAL COLLECTION| TOTAL DUE|NPA|Total Collection Ratio|
+------------------+--------------+---------------+----------------+----------+---+----------------------+
|          below_40|          3919|     1944160177|      1090936239|1283514098|794|    0.8499604645558011|
|             50-60|           284|      147406316|       107325272| 126220114| 67|    0.8503024486255811|
|             60-70|           578|      274971386|       202585805| 233141731|121|    0.8689384098293411|
|             70-80|          1191|      492528592|       375466137| 427395965|204|    0.8784971495928839|
|             80-90|          1225|      453259987|       355654288| 393837902|161|    0.9030473862315059|
|             40-50|            18|        9776837|         9674026|  10545368|  3|    0.9173720632603812|
|          90_above|           461|  

In [29]:
overall_score_slab.agg(f.sum("Total Portfolio")).show()

+--------------------+
|sum(Total Portfolio)|
+--------------------+
|          3474077090|
+--------------------+



In [30]:
overall_score_slab.agg(f.sum("Facility Count")).show()

+-------------------+
|sum(Facility Count)|
+-------------------+
|               7676|
+-------------------+



In [31]:
overall_score_slab.agg(f.sum("NPA")).show()

+--------+
|sum(NPA)|
+--------+
|    1380|
+--------+



In [32]:
## after_automated_3w_Credit_score slabs 40-50
after_automated_3w_Credit_score_slab_40_to_50=(
    after_automated_3w_Credit_score
    .filter(f.col("Overall_score_slab")=="40-50")
)


In [33]:
after_automated_3w_Credit_score_slab_40_to_50.show()

+-------------------+-----------+---------+----------------+---------+----------------+---------------+------------+------------------+
|             FAC NO|Connex Date|Portfolio|TOTAL COLLECTION|TOTAL DUE|90>=day_contract|Credit Decision|Credit Score|Overall_score_slab|
+-------------------+-----------+---------+----------------+---------+----------------+---------------+------------+------------------+
|'020900816709050201| 2022-06-02|   159254|          584782|   584782|               0|         Yellow|        47.6|             40-50|
|'012500807287050201| 2022-04-20|   154346|          895566|   895566|               0|         Yellow|        48.6|             40-50|
|'005000804594050202| 2022-04-04|   526834|          536886|   536886|               0|         Yellow|       45.51|             40-50|
|'004400655002050201| 2022-05-23|   848128|          707400|   707400|               0|         Yellow|       49.77|             40-50|
|'002100807434050201| 2022-05-06|   300545|     

In [35]:
after_automated_3w_Credit_score.toPandas().to_csv("../output/20240126_final_output/credit_score_detail_3w_report_new.csv")

In [None]:
# Conclusion