# **Predictive Default Risk Assessor**

In [79]:
model_inputs = {
    "profitability": {
        "class_weight": 0.30,
        "weights": [1.0], 
        "metrics": {
            "oper_margin": {
                "lower_is_better": False,
                "thresholds": [
                    (40, float("inf")),
                    (35, 39),
                    (30, 34),
                    (25, 29),
                    (20, 24),
                    (15, 19),
                    (10, 14),
                    (5, 9),
                    (float("-inf"), 0)
                ],
            }
        },
    },
    "leverage_coverage": {
        "class_weight": 0.55,
        "weights": [0.4, 0.3, 0.3],
        "metrics": {
            "tot_debt_to_tot_eqy": {
                "lower_is_better": True,
                "thresholds": [
                     (float("-inf"), 2.0),
                     (2.0, 16.0),
                     (16.0, 24.0),
                     (24.0, 33.0),
                     (33.0, 43.0),
                     (43.0, 54.0),
                     (54.0, 68.0),
                     (68.0, 94.0),
                     (94.0, float("inf")),
                ],
            },
            "tot_debt_to_ebitda": {
                "lower_is_better": True,
                "thresholds": [
                    (float("-inf"), 0.09),
                    (0.09, 0.49),
                    (0.49, 0.9),
                    (0.9, 1.36),
                    (1.36, 1.68),
                    (1.68, 2.26),
                    (2.26, 3.27),
                    (3.27, 4.4),
                    (4.4, float("inf")),
                ],
            },
            "ebitda_to_tot_int_exp": {
                "lower_is_better": False,
                "thresholds": [
                    (25, float("inf")),
                    (20, 25),
                    (15, 20),
                    (10, 15),
                    (5, 10),
                    (3, 5),
                    (1, 3),
                    (0, 1),
                    (float("-inf"), 0),
                ],
            },
        },
    },
    "efficiency": {
        "class_weight": 0.15,
        "weights": [0.5, 0.5],
        "metrics": {
            "return_on_asset": {
                "lower_is_better": False,
                "thresholds": [
                    (0.15, float("inf")),
                    (0.10, 0.15),
                    (0.08, 0.10),
                    (0.06, 0.08),
                    (0.04, 0.06),
                    (0.02, 0.04),
                    (0.00, 0.02),
                    (-0.02, 0.00),
                    (float("-inf"), -0.02)
                ],
            },
            "asset_turnover": {
                "lower_is_better": False,
                "thresholds": [
                    (4.0, float("inf")),
                    (3.0, 4.0),
                    (2.0, 3.0),
                    (1.5, 2.0),
                    (1.0, 1.5),
                    (0.75, 1.0),
                    (0.5, 0.75),
                    (0.25, 0.5),
                    (float("-inf"), 0.25)
                ],
            },
        },
    },
}





class CreditRatingCalculator:
    def __init__(self, metrics):
        self.metrics = metrics
        self.credit_rating_dict = {
            'Aaa': 2.5,
            'Aa': 3.5,
            'A': 4.5,
            'Baa': 5.5,
            'Ba': 6.5,
            'B': 7.5,
            'Caa': 8.5,
            'Ca': 9.5,
            'C': 10,
        }
        
    def _calculate_metric_score(self, metric, thresholds, inverse):
        for score, (lower, upper) in enumerate(thresholds, start=1):
            if (inverse and metric <= upper) or (not inverse and metric >= lower):
                return score
        return len(thresholds) // 2 # else return the middle score

    def _calculate_category_score(self, category_metrics, ratios):
        total_weighted_score = 0

        for metric, weight in zip(
            category_metrics["metrics"].items(), category_metrics["weights"]
        ):
            metric_name, metric_data = metric
            value = ratios[metric_name]
            score = self._calculate_metric_score(
                value, metric_data["thresholds"], metric_data["lower_is_better"]
            )
            total_weighted_score += score * weight

        return total_weighted_score

    def _calculate_scores(self, ratios):
        scores = {}
        for category, category_data in self.metrics.items():
            category_score = self._calculate_category_score(category_data, ratios)
            scores[category] = category_score
        return scores

    def _calculate_weighted_score(self, scores):
        weights = {
            category: category_data["class_weight"]
            for category, category_data in self.metrics.items()
        }
        return sum(scores[category] * weight for category, weight in weights.items())
            
    def _determine_credit_rating(self, weighted_score):
        sorted_credit_ratings = sorted(self.credit_rating_dict.items(), key=lambda item: item[1])
        for rating, threshold in sorted_credit_ratings:
            if weighted_score <= threshold:
                return rating
        return "N/R" 

    def calculate_credit_rating(self, ratios):
        self.scores = self._calculate_scores(ratios)
        self.credit_score = self._calculate_weighted_score(self.scores)
        self.credit_rating = self._determine_credit_rating(self.credit_score)

In [80]:
import pandas as pd
import numpy as np

In [81]:
# df = pd.read_csv("research/JALSH Index_dataset_2000_2024_clean.csv", index_col=0, header=[0, 1])
# classfier = pd.read_excel("research/classification_data.xlsx", index_col=0)
metrics = pd.read_excel("research/metrics_full.xlsx", index_col=0)

In [82]:
company = "VOD SJ Equity"

In [83]:
model_metrics = [
    "oper_margin", 
    "tot_debt_to_tot_eqy", 
    "tot_debt_to_ebitda", 
    "ebitda_to_tot_int_exp", 
    "return_on_asset",
    "asset_turnover",
]

ratios = metrics[company].loc[model_metrics].to_dict()

In [84]:
model = CreditRatingCalculator(model_inputs)
model.calculate_credit_rating(ratios)
print(f"Model Inputs:")
display(ratios)
print("")
print(f"Class Scoring: {model.scores}")
print(f"Credit Score: {model.credit_score}")
print(f"Credit Rating: {model.credit_rating}")

Model Inputs:


{'oper_margin': 26.21094703807191,
 'tot_debt_to_tot_eqy': 65.00493973766227,
 'tot_debt_to_ebitda': 0.7931121155634536,
 'ebitda_to_tot_int_exp': 13.35518524144715,
 'return_on_asset': 16.14384135791923,
 'asset_turnover': 1.076701663147559}


Class Scoring: {'profitability': 4.0, 'leverage_coverage': 4.9, 'efficiency': 3.0}
Credit Score: 4.345000000000001
Credit Rating: A


In [10]:
df = pd.read_excel("resources/stock_universe_default_prob.xlsx", index_col=0)
ratings = df.sort_values(["bb_1yr_default_prob"], ascending=False)

In [87]:
ratios = [metrics[c].loc[model_metrics].to_dict() for c in metrics.columns] 

In [94]:
X = metrics.T

In [96]:
y = y.to_frame()

In [114]:
X = X.replace(0, np.nan)

In [125]:
X = X[model_metrics]

In [126]:
from pycaret.regression import *

In [127]:
model = setup(X.join(y), target="numerical_rating")

Unnamed: 0,Description,Value
0,Session id,1307
1,Target,numerical_rating
2,Target type,Regression
3,Original data shape,"(124, 7)"
4,Transformed data shape,"(124, 7)"
5,Transformed train set shape,"(86, 7)"
6,Transformed test set shape,"(38, 7)"
7,Numeric features,6
8,Rows with missing values,14.5%
9,Preprocess,True




In [128]:
best = compare_models()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
catboost,CatBoost Regressor,0.7566,1.0414,0.9742,0.1075,0.2095,0.2273,1.62
et,Extra Trees Regressor,0.7695,1.0881,0.9841,0.0731,0.2107,0.2316,0.079
knn,K Neighbors Regressor,0.8258,1.0975,1.0071,0.0124,0.218,0.2513,0.023
rf,Random Forest Regressor,0.8268,1.1349,1.0194,0.0069,0.2203,0.2527,0.111
ada,AdaBoost Regressor,0.8887,1.1508,1.0387,-0.0377,0.227,0.2804,0.056
omp,Orthogonal Matching Pursuit,0.9436,1.2094,1.0761,-0.0965,0.2325,0.2857,0.013
lightgbm,Light Gradient Boosting Machine,0.8774,1.2266,1.0704,-0.1074,0.2319,0.2642,0.062
dummy,Dummy Regressor,0.961,1.2466,1.0927,-0.1278,0.236,0.2902,0.024
br,Bayesian Ridge,0.9467,1.2693,1.0968,-0.1812,0.2378,0.2898,0.022
lasso,Lasso Regression,0.9394,1.267,1.0923,-0.1892,0.2369,0.2878,0.013


  .applymap(highlight_cols, subset=["TT (Sec)"])


In [129]:
best

<catboost.core.CatBoostRegressor at 0x23199373ed0>

In [130]:
best.score(model.X_test_transformed, model.y_test_transformed)

-0.18830834710825806

In [131]:
best.feature_importances_

array([15.68425982, 18.29144601, 14.47126786, 17.04432959, 24.17062741,
       10.33806931])

In [None]:
{'oper_margin': 26.21094703807191,
 'tot_debt_to_tot_eqy': 65.00493973766227,
 'tot_debt_to_ebitda': 0.7931121155634536,
 'ebitda_to_tot_int_exp': 13.35518524144715,
 'return_on_asset': 16.14384135791923,
 'asset_turnover': 1.076701663147559}

In [32]:
df = pd.read_excel("resources/stock_universe_default_prob.xlsx", index_col=0)
ratings = df.sort_values(["bb_1yr_default_prob"], ascending=False)

ratings_dict = {}
for company in ratings.index:
    try:
        ratios = metrics[company].loc[model_metrics].to_dict()
        model = CreditRatingCalculator(model_inputs)
        model.calculate_credit_rating(ratios)
        ratings_dict[company] = {"Model": model.credit_rating, "Bloomberg": ratings.loc[company]["rsk_bb_issuer_default"]}
    except: pass

In [33]:
data = pd.DataFrame(ratings_dict).T

In [36]:
import json

In [37]:
with open("file.json", "r") as f:
    file = json.load(f)

In [44]:
mappings = file['credit_rating_mappings']

In [50]:
scl = file['credit_rating_scale']

In [46]:

reverse_mappings = {value: key for key, values in mappings.items() for value in values}

# Map the values in the 'rating' column using the reverse mapping dictionary
data['rating'] = data['Bloomberg'].map(lambda x: reverse_mappings.get(x, x))

In [54]:
data['numerical_rating'] = data['rating'].map(scl)

In [59]:
expected_ratings = data['numerical_rating']

In [73]:
optimized_metrics = update_metrics(metrics, params)


KeyError: 'class_weights'

In [11]:
optimized_metrics

{'efficiency': {'class_weight': 0.15,
  'weights': [0.5, 0.5],
  'metrics': {'return_on_asset': {'lower_is_better': False,
    'thresholds': [(0.15, inf), (0.1, 0.15), Ellipsis, (-inf, -0.02)]},
   'asset_turnover': {'lower_is_better': False,
    'thresholds': [(4.0, inf), (3.0, 4.0), Ellipsis, (-inf, 0.25)]}}}}

In [14]:
# Initial parameters
params = {
    "class_weights": {"efficiency": 0.15},
    "thresholds": {
        "efficiency": {
            "return_on_asset": [(0.15, float("inf")), (0.10, 0.15), ..., (float("-inf"), -0.02)],
            "asset_turnover": [(4.0, float("inf")), (3.0, 4.0), ..., (float("-inf"), 0.25)]
        }
    }
}

In [9]:
def update_metrics(metrics, params):
    updated_metrics = metrics.copy()
    
    # Update class weights
    for category, weight in params["class_weights"].items():
        updated_metrics[category]["class_weight"] = weight
    
    # Update thresholds
    for category, category_data in metrics.items():
        for metric, metric_data in category_data["metrics"].items():
            updated_metrics[category]["metrics"][metric]["thresholds"] = params["thresholds"][category][metric]
    
    return updated_metrics

In [18]:
def update_weights_and_class_weights(data_dict, category, new_class_weight, new_weights):
    if category in data_dict:
        if 'class_weight' in data_dict[category]:
            data_dict[category]['class_weight'] = new_class_weight
        if 'weights' in data_dict[category]:
            data_dict[category]['weights'] = new_weights
    else:
        print(f"Category {category} not found in data dictionary.")

In [19]:
class ModelDataModifier:
    def __init__(self, data_dict):
        """Initialize with the model data dictionary."""
        self.data = data_dict

    def update_from_dict(self, update_dict):
        for category, updates in update_dict.items():
            if category not in self.data:
                print(f"Category '{category}' not found.")
                continue

            class_weight = updates.get('class_weight')
            if class_weight is not None:
                if isinstance(class_weight, float):
                    self.data[category]['class_weight'] = class_weight
                else:
                    print(f"Invalid class_weight type for '{category}'; must be float.")

            weights = updates.get('weights')
            if weights is not None:
                if all(isinstance(w, float) for w in weights):
                    self.data[category]['weights'] = weights
                else:
                    print(f"Invalid weights for '{category}'; each weight must be a float.")

In [27]:
modifier = ModelDataModifier(model_inputs)
updates = {
    'profitability': {'class_weight': 0.35, 'weights': [1.0]},
    'leverage_coverage': {'weights': [0.5, 0.25, 0.25]},
    'efficiency': {'class_weight': 0.20, 'weights': [0.6, 0.4]}
}

# Perform updates
modifier.update_from_dict(updates)