# **Predictive Default Risk Assessor V.01**

# TODO

* Base model 
* Comparison
* Specialised
* For small entities - Examples?
* Backtest - All sectors 
* Understanding the model across all sectors/industries
* Any markets - consumer goods, industries
* UI last step after backtesting

In [31]:
import pandas as pd
import numpy as np

In [32]:
model_inputs = {
    "profitability": {
        "class_weight": 0.30,
        "weights": [1.0], 
        "metrics": {
            "oper_margin": {
                "lower_is_better": False,
                "thresholds": [
                    (50, 45),
                     (45, 40),
                     (40, 35),
                     (35, 30),
                     (30, 25),
                     (25, 20),
                     (20, 15),
                     (15, 10),
                     (5, 0)]
            }
        },
    },
    "leverage_coverage": {
        "class_weight": 0.55,
        "weights": [0.3, 0.2, 0.5],
        "metrics": {
            "tot_debt_to_tot_eqy": {
                "lower_is_better": True,
                "thresholds": [
                     (float("-inf"), 2.0),
                     (2.0, 16.0),
                     (16.0, 24.0),
                     (24.0, 33.0),
                     (33.0, 43.0),
                     (43.0, 54.0),
                     (54.0, 68.0),
                     (68.0, 94.0),
                     (94.0, float("inf")),
                ],
            },
            "tot_debt_to_ebitda": {
                "lower_is_better": True,
                "thresholds": [
                    (float("-inf"), 0.09),
                    (0.09, 0.49),
                    (0.49, 0.9),
                    (0.9, 1.36),
                    (1.36, 1.68),
                    (1.68, 2.26),
                    (2.26, 3.27),
                    (3.27, 4.4),
                    (4.4, float("inf")),
                ],
            },
            "ebitda_to_tot_int_exp": {
                "lower_is_better": False,
                "thresholds": [
                    (25, float("inf")),
                    (20, 25),
                    (15, 20),
                    (10, 15),
                    (5, 10),
                    (3, 5),
                    (1, 3),
                    (0, 1),
                    (float("-inf"), 0),
                ],
            },
        },
    },
    "efficiency": {
        "class_weight": 0.15,
        "weights": [0.3, 0.7],
        "metrics": {
            "return_on_asset": {
                "lower_is_better": False,
                "thresholds": [
                    (0.15, float("inf")),
                    (0.10, 0.15),
                    (0.08, 0.10),
                    (0.06, 0.08),
                    (0.04, 0.06),
                    (0.02, 0.04),
                    (0.00, 0.02),
                    (-0.02, 0.00),
                    (float("-inf"), -0.02)
                ],
            },
            "asset_turnover": {
                "lower_is_better": False,
                "thresholds": [
                    (4.0, float("inf")),
                    (3.0, 4.0),
                    (2.0, 3.0),
                    (1.5, 2.0),
                    (1.0, 1.5),
                    (0.75, 1.0),
                    (0.5, 0.75),
                    (0.25, 0.5),
                    (float("-inf"), 0.25)
                ],
            },
        },
    },
}

In [33]:
model_metrics = [
    'oper_margin', 'tot_debt_to_tot_eqy', 'tot_debt_to_ebitda',
    'ebitda_to_tot_int_exp', 'return_on_asset', 'asset_turnover',
]

In [34]:
class CreditRatingCalculator:
    def __init__(self, metrics):
        self.metrics = metrics
        
    def _calculate_metric_score(self, metric, thresholds, inverse):
        for score, (lower, upper) in enumerate(thresholds, start=1):
            if (inverse and metric <= upper) or (not inverse and metric >= lower):
                return score
        return len(thresholds) // 2 # else return the middle score

    def _calculate_category_score(self, category_metrics, ratios):
        total_weighted_score = 0

        for metric, weight in zip(
            category_metrics["metrics"].items(), category_metrics["weights"]
        ):
            metric_name, metric_data = metric
            value = ratios[metric_name]
            score = self._calculate_metric_score(
                value, metric_data["thresholds"], metric_data["lower_is_better"]
            )
            total_weighted_score += score * weight

        return total_weighted_score

    def _calculate_scores(self, ratios):
        scores = {}
        for category, category_data in self.metrics.items():
            category_score = self._calculate_category_score(category_data, ratios)
            scores[category] = category_score
        return scores

    def _calculate_weighted_score(self, scores):
        weights = {
            category: category_data["class_weight"]
            for category, category_data in self.metrics.items()
        }
        return sum(scores[category] * weight for category, weight in weights.items())

    def _determine_credit_rating(self, weighted_score):
        credit_ratings = {
            "Aaa": 2.5,
            "Aa": 3.5,
            "A": 4.5,
            "Baa": 5.5,
            "Ba": 6.5,
            "B": 7.5,
            "Caa": 8.5,
            "Ca": 9.5,
            "C": float("inf")
        }
        return next(rating for rating, threshold in credit_ratings.items() if weighted_score <= threshold)

    def calculate_credit_rating(self, ratios):
        self.scores = self._calculate_scores(ratios)
        self.credit_score = self._calculate_weighted_score(self.scores)
        self.credit_rating = self._determine_credit_rating(self.credit_score)

In [35]:
def determine_credit_rating(weighted_score):
    credit_ratings = [
        (2.5, "Aaa"),
        (3.5, "Aa"),
        (4.5, "A"),
        (5.5, "Baa"),
        (6.5, "Ba"),
        (7.5, "B"),
        (8.5, "Caa"),
        (9.5, "Ca"),
        (10, "C"),
    ]

    for threshold, rating in credit_ratings:
        if weighted_score < threshold:
            return rating

In [56]:
features = pd.read_excel("dataset/features.xlsx", index_col=0)
targets = pd.read_excel("dataset/target.xlsx", index_col=0)
features.columns = features.columns.str.lower()

In [37]:
features[model_metrics].describe()

Unnamed: 0,oper_margin,tot_debt_to_tot_eqy,tot_debt_to_ebitda,ebitda_to_tot_int_exp,return_on_asset,asset_turnover
count,70.0,70.0,61.0,51.0,70.0,70.0
mean,18.470457,181.766303,4.716249,5.665463,3.134256,0.606143
std,22.33447,437.735333,7.996142,5.156308,6.785515,0.66352
min,-57.6696,4.3685,0.1387,-0.5927,-24.4027,0.0303
25%,5.59605,47.506775,1.8845,1.79425,0.8307,0.188175
50%,16.20665,78.14185,3.3664,3.52,2.30485,0.4853
75%,31.520875,149.426875,5.0673,8.35025,6.0902,0.688875
max,64.345,3272.6638,61.124,24.3114,21.3106,3.8526


In [38]:
import joblib
import pandas as pd

study = {}
studies = {}

#features = features.dropna()
for company in features.index:
    ratios = features.loc[company][model_metrics].to_dict()    
    model = CreditRatingCalculator(model_inputs)
    model.calculate_credit_rating(ratios)
    
    credit_score = model.credit_score
    credit_rating = model.credit_rating
    
    study[company] = {
        "model_class_scores": model.scores, 
        "model_inputs": ratios, 
        "model_credit_ratings": credit_rating, 
        "model_credit_scores": float(credit_score),
    }
study = pd.DataFrame(study).T


In [39]:
from sklearn.metrics import mean_squared_error

In [40]:
mean_squared_error(study['model_credit_scores'], targets.loc[features.index]['numeric_rating'])

1.2683428571428572

In [49]:
study['target'] = targets.loc[features.index]['numeric_rating']

In [53]:
study['rating'] = targets.loc[features.index]["rating"]

In [55]:
features

Unnamed: 0,asset_turnover,bs_lt_borrow,bs_st_borrow,bs_total_liabilities,bs_tot_asset,cfo_to_tot_debt,ebitda,ebitda_to_interest_expn,ebitda_to_revenue,ebitda_to_tot_int_exp,...,return_on_cap,return_on_inv_capital,sales_growth,sales_rev_turn,short_and_long_term_debt,total_equity,tot_debt_to_ebitda,tot_debt_to_tot_asset,tot_debt_to_tot_cap,tot_debt_to_tot_eqy
AGL SJ Equity,0.4577,2.770832e+05,31777.2720,6.378648e+05,1.215280e+06,0.3649,121616.1358,4.7608,21.4962,3.4697,...,3.8440,3.3318,-12.7171,5.657577e+05,3.088605e+05,5.774149e+05,2.5667,25.4148,34.8493,53.4902
ANG SJ Equity,0.5640,3.889976e+04,5113.5840,8.099552e+04,1.492984e+05,0.4531,18864.5579,9.6807,25.5943,9.6807,...,-12.4486,-2.1459,1.7996,8.457203e+04,4.401335e+04,6.830287e+04,1.8845,29.4801,39.1870,64.4385
ANH SJ Equity,0.2747,1.354406e+06,73124.2512,2.313239e+06,4.005763e+06,0.1697,357280.8529,5.0252,32.5985,4.9105,...,5.8489,5.9292,2.7585,1.096003e+06,1.427530e+06,1.692523e+06,4.0381,35.6369,45.7534,84.3433
BAW SJ Equity,0.8784,7.705000e+03,4142.0000,3.130300e+04,4.812200e+04,0.1198,6624.0000,4.1374,14.7108,4.1374,...,10.6581,9.6124,14.3336,4.502800e+04,1.184700e+04,1.681900e+04,1.7885,24.6187,41.3277,70.4382
BHG SJ Equity,0.5479,2.852730e+05,134871.0498,9.921380e+05,1.904628e+06,0.8369,486944.0718,32.0082,50.9114,24.3114,...,21.8961,20.3470,-17.3293,9.564535e+05,4.201441e+05,9.124902e+05,0.8155,22.0591,31.5273,46.0437
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
UGPA3 BS Equity,3.3751,1.098727e+04,2304.6800,2.422215e+04,3.825197e+04,0.2896,5720.7110,3.5200,4.5385,3.5200,...,13.8756,12.2053,-12.2436,1.260487e+05,1.329195e+04,1.402983e+04,2.3235,34.7484,48.6497,94.7407
USIM5 BS Equity,0.6896,5.855413e+03,167.9600,1.361231e+04,4.016175e+04,0.7852,1592.4070,2.3043,5.7616,2.3043,...,6.4728,6.5389,-14.8817,2.763835e+04,6.023373e+03,2.654944e+04,3.7826,14.9978,18.4920,22.6874
VALE3 BS Equity,0.4576,6.246400e+04,4940.0000,2.576590e+05,4.559840e+05,0.9927,85917.0000,14.8800,41.2931,14.6366,...,16.9916,23.7007,-8.1419,2.080660e+05,6.740400e+04,1.983250e+05,0.7845,14.7821,25.3657,33.9866
VAMO3 BS Equity,0.3247,1.083538e+04,881.6250,1.607353e+04,2.080883e+04,-0.2292,2668.1330,1.7422,43.8442,1.7422,...,11.6270,11.4471,23.8534,6.085482e+03,1.171701e+04,4.735295e+03,4.3915,56.3079,71.2180,247.4399


In [59]:
X = features[model_metrics]

In [62]:
y = targets['numeric_rating']

In [66]:
from sklearn.tree import DecisionTreeRegressor

In [108]:
model = DecisionTreeRegressor(ccp_alpha=0.01, min_weight_fraction_leaf=0.02, random_state=23)

In [109]:
model.fit(X, y)

In [110]:
model.score(X, y)

0.7779107494282219

In [70]:
model.feature_importances_

array([0.08920289, 0.0620057 , 0.37561832, 0.08883084, 0.28234072,
       0.10200153])

In [123]:
X_test = features.iloc[:28]
y_test = targets.loc[X_test.index]["numeric_rating"]

X_train = features.iloc[28:]
y_train = targets.loc[X_train.index]["numeric_rating"]

In [124]:
from pycaret.regression import *

In [125]:
exp = setup(X_train, target=y_train, preprocess=False)

Unnamed: 0,Description,Value
0,Session id,5948
1,Target,numeric_rating
2,Target type,Regression
3,Original data shape,"(42, 27)"
4,Transformed data shape,"(42, 27)"
5,Transformed train set shape,"(29, 27)"
6,Transformed test set shape,"(13, 27)"
7,Numeric features,26
8,Rows with missing values,9.5%




In [126]:
best = compare_models()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
dt,Decision Tree Regressor,0.3333,0.5333,0.4613,0.0857,0.0633,0.0514,0.022
lightgbm,Light Gradient Boosting Machine,0.2536,0.2719,0.3452,-0.1303,0.0464,0.04,0.035
dummy,Dummy Regressor,0.2536,0.2719,0.3452,-0.1303,0.0464,0.04,0.015
catboost,CatBoost Regressor,0.3093,0.3278,0.4189,-0.1994,0.0566,0.0486,2.041
rf,Random Forest Regressor,0.3433,0.3421,0.4379,-0.2339,0.059,0.0536,0.166
xgboost,Extreme Gradient Boosting,0.4207,0.663,0.5593,-1.6554,0.0745,0.0679,0.057


  .applymap(highlight_cols, subset=["TT (Sec)"])


In [127]:
best = tune_model(best)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.3333,0.3333,0.5774,0.0,0.0723,0.0513
2,0.0,0.0,0.0,1.0,0.0,0.0
3,0.1111,0.037,0.1925,0.8333,0.0289,0.0202
4,0.5,0.4167,0.6455,0.0,0.0917,0.0769
5,0.3333,0.3333,0.5774,-0.5,0.0826,0.0606
6,1.0,1.6667,1.291,-0.0714,0.1595,0.139
7,0.0,0.0,0.0,1.0,0.0,0.0
8,0.5833,0.5208,0.7217,-1.3437,0.1026,0.0991
9,0.25,0.125,0.3536,0.0,0.0488,0.0385


Fitting 10 folds for each of 10 candidates, totalling 100 fits


10 fits failed out of a total of 100.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
10 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Thabang Ndhlovu\conda\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Thabang Ndhlovu\AppData\Roaming\Python\Python311\site-packages\pycaret\internal\pipeline.py", line 276, in fit
    fitted_estimator = self._memory_fit(
                       ^^^^^^^^^^^^^^^^^
  File "C:\Users\Thabang Ndhlovu\conda\Lib\site-packages\joblib\memory.py", line 353, in __call__
    return self.func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Thabang 

In [129]:
best.score(X_test, y_test)

-0.0023866348448684516

In [7]:
def my_utility_function(self, x, y):
    print(f"info in the class {self.z}")
    return x + y

class MyClass:
    def __init__(self, x, y):
        self.x = x
        self.y = y
        self.z = 829

    def do_something(self):
        result = my_utility_function(self, self.x, self.y)
        print(result)


In [8]:
a = MyClass(2, 2)

In [9]:
a.do_something()

info in the class 829
4
