# **Predictive Default Risk Assessor V.01**

# TODO

* Base model 
* Comparison
* Specialised
* For small entities - Examples?
* Backtest - All sectors 
* Understanding the model across all sectors/industries
* Any markets - consumer goods, industries
* UI last step after backtesting

In [94]:
import json

import numpy as np
import pandas as pd

from collections import namedtuple
from sklearn.metrics import mean_squared_error, root_mean_squared_error, accuracy_score, mean_absolute_percentage_error
from dataclasses import dataclass
from quantstats import * 
from hellocredit import *
from hellocredit.utils import get_rating_meta
from hellocredit import calculate_credit_rating, get_nested_dict
extend_pandas()

In [90]:
targets = pd.read_excel("dataset/target.xlsx", index_col=0)['numeric_rating']
jse_all_share = [stock for stock in targets.index if "SJ" in stock]
features = pd.read_excel("dataset/jalsh_dataset.xlsx", sheet_name="data", index_col=0, parse_dates=True, header=[0, 1])
features = features[jse_all_share]

In [113]:
X = features.mean().unstack()
y = targets.loc[X.index]

In [129]:
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.model_selection import train_test_split

In [130]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [131]:
model = HistGradientBoostingRegressor()
model.fit(X_train, y_train)
model.score(X_test, y_test)

-0.18569958847736556

count    24.000000
mean     13.883184
std      16.690738
min      -2.374057
25%       5.409520
50%       8.592150
75%      14.268756
max      76.656669
Name: EBITDA_TO_TOT_INT_EXP, dtype: float64

In [150]:
features = pd.read_excel("dataset/jalsh_dataset.xlsx", sheet_name="data", index_col=0, parse_dates=True, header=[0, 1])

In [152]:
X = features.mean().unstack()

In [188]:
X.describe()["RETURN_ON_ASSET"]

count    121.000000
mean       7.166755
std        5.727359
min       -2.835850
25%        2.456150
50%        6.490792
75%        9.984608
max       31.639859
Name: RETURN_ON_ASSET, dtype: float64

In [187]:
X.columns

Index(['ASSET_TO_EQY', 'BS_CASH_NEAR_CASH_ITEM', 'BS_INVENTORIES',
       'BS_OTHER_PPE_GROSS', 'BS_TOTAL_AVAIL_LINE_OF_CREDIT',
       'BS_TOTAL_LIABILITIES', 'BS_TOT_ASSET', 'CASH_TO_TOT_ASSET',
       'CFO_TO_TOT_DEBT', 'CF_FREE_CASH_FLOW', 'EBITDA_MARGIN',
       'EBITDA_TO_REVENUE', 'EBITDA_TO_TOT_INT_EXP', 'FCF_TO_TOTAL_DEBT',
       'IS_INT_EXPENSE', 'IS_NET_INTEREST_EXPENSE', 'RETURN_ON_ASSET',
       'SALES_GROWTH', 'SALES_TO_INVENT', 'SALES_TO_TOT_ASSET',
       'TANGIBLE_ASSETS', 'TOTAL_EQUITY', 'TOT_DEBT_TO_EBITDA',
       'TOT_DEBT_TO_TOT_ASSET', 'TOT_DEBT_TO_TOT_EQY',
       'TOTAL_DEBT_TO_TANGIBLE_ASSETS'],
      dtype='object', name='Dates')

# MODEL TRAINING

In [86]:
debt_to_ebitda

In [26]:
def calculate_loss(model_inputs):
    yhat = []
    for company in features.index:
        ratios = features.loc[company][model_metrics].to_dict()    
        model = CreditRatingCalculator(model_inputs)
        model.calculate_credit_rating(ratios)

        credit_score = model.credit_score
        credit_rating = model.credit_rating

        yhat.append(credit_score)

    y_true = targets['numeric_rating']
    yhat = np.round(yhat, 1)
    loss = mean_absolute_percentage_error(yhat, y_true)
    return loss

def normalize_weights(weights):
    total = sum(weights)
    return [weight / total for weight in weights]

def train_model(model_inputs, learning_rate=0.01, num_iterations=1000):
    
    np.random.seed(23)
    
    # Initialize weights and class_weights
    for category in model_inputs.values():
        category["class_weight"] = np.random.random()
        category["weights"] = np.random.random(len(category["weights"]))
        category["weights"] = normalize_weights(category["weights"])

    # Perform gradient descent
    for epoch in range(num_iterations):
        # Calculate gradients
        gradients = {}
        for category, category_data in model_inputs.items():
            gradients[category] = {
                "class_weight": 0.0,
                "weights": np.zeros_like(category_data["weights"])
            }

        # Calculate loss and gradients
        loss = calculate_loss(model_inputs)
        for category, category_data in model_inputs.items():
            # Calculate gradient for class_weight
            category_data["class_weight"] += 0.0001
            gradients[category]["class_weight"] = (calculate_loss(model_inputs) - loss) / 0.0001
            category_data["class_weight"] -= 0.0001

            # Calculate gradients for weights
            for i in range(len(category_data["weights"])):
                category_data["weights"][i] += 0.0001
                gradients[category]["weights"][i] = (calculate_loss(model_inputs) - loss) / 0.0001
                category_data["weights"][i] -= 0.0001

        # Update weights and class_weights
        for category, category_data in model_inputs.items():
            category_data["class_weight"] -= learning_rate * gradients[category]["class_weight"]
            category_data["weights"] -= learning_rate * gradients[category]["weights"]
            category_data["weights"] = normalize_weights(category_data["weights"])

        # Normalize class_weights
        class_weights = [category_data["class_weight"] for category_data in model_inputs.values()]
        normalized_class_weights = normalize_weights(class_weights)
        for category, weight in zip(model_inputs.keys(), normalized_class_weights):
            model_inputs[category]["class_weight"] = weight
        
        if epoch % 100 == 0:
            print(f"Epoch {epoch}: Loss = {loss:.4f}")
        
    return model_inputs

In [27]:
# Train the model
trained_model_inputs = train_model(model_inputs, learning_rate=0.1, num_iterations=300)

# Print the optimized weights and class_weights
for category, category_data in trained_model_inputs.items():
    print(f"Category: {category}")
    print(f"Class Weight: {category_data['class_weight']}")
    print(f"Weights: {category_data['weights']}")
    print()

Epoch 0: Loss = 0.2761
Epoch 100: Loss = 0.2593
Epoch 200: Loss = 0.2593
Category: profitability
Class Weight: 0.367213686010887
Weights: [1.0]

Category: leverage_coverage
Class Weight: 0.614510015986914
Weights: [0.2373745964555163, 0.18580497915793837, 0.5768204243865455]

Category: efficiency
Class Weight: 0.018276298002199015
Weights: [0.3883666310182689, 0.6116333689817312]



In [4]:
import numpy as np
from scipy.optimize import minimize

def calculate_loss(model_inputs):
    yhat = []
    for company in features.index:
        ratios = features.loc[company][model_metrics].to_dict()
        model = CreditRatingCalculator(model_inputs)
        model.calculate_credit_rating(ratios)
        credit_score = model.credit_score
        yhat.append(credit_score)
    y_true = targets['numeric_rating']
    yhat = np.round(yhat, 1)
    loss = mean_absolute_percentage_error(yhat, y_true)
    return loss

def normalize_weights(weights):
    total = sum(weights)
    return [weight / total for weight in weights]

def train_model(model_inputs, learning_rate=0.01, num_iterations=5000):
    np.random.seed(23)
    
    # Initialize weights and class_weights
    for category in model_inputs.values():
        category["class_weight"] = np.random.random()
        category["weights"] = np.random.random(len(category["weights"]))
        category["weights"] = normalize_weights(category["weights"])
    
    # Define the objective function for optimization
    def objective(params):
        idx = 0
        for category in model_inputs.values():
            category["class_weight"] = params[idx]
            idx += 1
            category["weights"] = params[idx:idx+len(category["weights"])]
            idx += len(category["weights"])
        return calculate_loss(model_inputs)
    
    # Define the bounds for optimization
    bounds = []
    for category in model_inputs.values():
        bounds.append((0, 1))  # Class weight bounds
        bounds.extend([(0, 1)] * len(category["weights"]))  # Weight bounds
    
    # Perform optimization using L-BFGS-B
    initial_params = []
    for category in model_inputs.values():
        initial_params.append(category["class_weight"])
        initial_params.extend(category["weights"])
    
    result = minimize(objective, initial_params, method='L-BFGS-B', bounds=bounds, options={'maxiter': num_iterations})
    print(result)
    
    # Update the optimized weights and class_weights
    optimized_params = result.x
    idx = 0
    for category in model_inputs.values():
        category["class_weight"] = optimized_params[idx]
        idx += 1
        category["weights"] = optimized_params[idx:idx+len(category["weights"])]
        idx += len(category["weights"])
    
    return model_inputs

In [5]:
# Train the model
trained_model_inputs = train_model(model_inputs, learning_rate=0.01, num_iterations=3000)

# Print the optimized weights and class_weights
for category, category_data in trained_model_inputs.items():
    print(f"Category: {category}")
    print(f"Class Weight: {category_data['class_weight']}")
    print(f"Weights: {category_data['weights']}")
    print()

NameError: name 'model_inputs' is not defined

In [None]:
def calculate_loss(model_inputs):
    yhat = []
    for company in features.index:
        ratios = features.loc[company][model_metrics].to_dict()
        model = CreditRatingCalculator(model_inputs)
        model.calculate_credit_rating(ratios)
        credit_score = model.credit_score
        yhat.append(credit_score)
    y_true = targets['numeric_rating']
    yhat = np.round(yhat, 1)
    loss = mean_absolute_percentage_error(yhat, y_true)
    return loss

In [28]:
calculate_loss(trained_model_inputs)

0.2592912997321396

In [14]:
def train_model(model_inputs, learning_rate=0.01, num_iterations=5000):
    np.random.seed(23)
    
    # Initialize weights and class_weights
    for category in model_inputs.values():
        category["class_weight"] = np.random.random()
        category["weights"] = np.random.random(len(category["weights"]))
        category["weights"] = normalize_weights(category["weights"])
    
    # Define the objective function for optimization
    def objective(params):
        idx = 0
        for category in model_inputs.values():
            category["class_weight"] = params[idx]
            idx += 1
            category["weights"] = params[idx:idx+len(category["weights"])]
            idx += len(category["weights"])
        return calculate_loss(model_inputs)
    
    # Define the bounds for optimization
    bounds = []
    for category in model_inputs.values():
        bounds.append((0, 1))  # Class weight bounds
        bounds.extend([(0, 1)] * len(category["weights"]))  # Weight bounds
    
    # Perform optimization using L-BFGS-B
    initial_params = []
    for category in model_inputs.values():
        initial_params.append(category["class_weight"])
        initial_params.extend(category["weights"])
    
    result = minimize(objective, initial_params, method='L-BFGS-B', bounds=bounds, options={'maxiter': num_iterations})
    
    # Update the optimized weights and class_weights
    optimized_params = result.x
    idx = 0
    for category in model_inputs.values():
        category["class_weight"] = optimized_params[idx]
        idx += 1
        category["weights"] = optimized_params[idx:idx+len(category["weights"])]
        idx += len(category["weights"])
    
    # Calculate and print the final error
    final_error = calculate_loss(model_inputs)
    print(f"Final Error: {final_error:.4f}")
    
    return model_inputs

In [110]:
def calculate_loss(model_inputs):
    yhat = []
    for company in features.index:
        ratios = features.loc[company][model_metrics].to_dict()    
        model = CreditRatingCalculator(model_inputs)
        model.calculate_credit_rating(ratios)

        credit_score = model.credit_score
        credit_rating = model.credit_rating

        yhat.append(credit_score)

    y_true = targets['numeric_rating']
    yhat = np.round(yhat, 1)
    loss = mean_absolute_percentage_error(yhat, y_true)
    print(y_true.values)
    print(yhat)
    return loss

In [111]:
calculate_loss(trained_model_inputs)

[5.5 5.5 4.5 6.5 4.5 5.5 6.5 4.5 6.5 6.5 7.5 5.5 5.5 6.5 5.5 5.5 6.5 6.5
 5.5 6.5 5.5 6.5 5.5 6.5 6.5 6.5 6.5 6.5 5.5 8.5 6.5 6.5 6.5 6.5 6.5 6.5
 6.5 6.5 6.5 6.5 6.5 6.5 6.5 6.5 6.5 6.5 5.5 6.5 5.5 6.5 6.5 6.5 6.5 6.5
 6.5 6.5 6.5 5.5 6.5 6.5 7.5 6.5 6.5 5.5 5.5 6.5 6.5 5.5 6.5 6.5]
[9.1 3.7 6.9 9.3 2.6 3.5 9.4 5.8 6.8 0.2 3.5 3.7 4.  0.2 3.5 9.2 0.2 7.
 0.2 3.8 3.5 0.2 3.6 9.3 9.2 9.2 9.2 3.7 7.  9.3 0.2 5.7 9.  9.  9.4 2.4
 3.8 3.8 4.6 8.1 8.  9.2 4.6 4.6 9.1 6.9 9.3 4.7 4.  6.9 9.1 3.8 3.7 3.7
 3.7 0.3 4.6 4.1 9.2 6.9 6.9 9.  6.9 4.7 2.5 4.2 3.7 4.7 4.7 4.2]


3.305207936385858

In [141]:
def get_buckets(min_val, max_val, lower_is_better=False, num_buckets=9):
    """
    Generates optimized buckets based on min, max values, desired number of buckets, and whether lower values are better.

    Args:
        min_val (float): The minimum value.
        max_val (float): The maximum value.
        num_buckets (int, optional): Number of buckets. Defaults to 9.
        lower_is_better (bool, optional): True if lower values are better, else False. Defaults to False.

    Returns:
        list: List of tuples (start, end) representing each bucket's range.
    """
    min_val, max_val = (max_val, min_val) if lower_is_better else (min_val, max_val)
    interval = (max_val - min_val) / (num_buckets - 1)
    buckets = [(round(min_val + i * interval, 2), round(min_val + (i + 1) * interval, 2)) for i in range(num_buckets - 1)]
    buckets.append((round(max_val - interval, 2), max_val))
    return list(reversed(buckets)) if lower_is_better else buckets


# Redefine the values and number of buckets for clarity
min_val = -50
max_val = 50

# Generate the optimized buckets
buckets_list = get_buckets(min_val,  max_val)
print(buckets_list)

[(-50.0, -37.5), (-37.5, -25.0), (-25.0, -12.5), (-12.5, 0.0), (0.0, 12.5), (12.5, 25.0), (25.0, 37.5), (37.5, 50.0), (37.5, 50)]


In [144]:
max_val = features[model_metrics].max()
min_val = features[model_metrics].min()

In [155]:
for metric in model_metrics:
    buckets = get_buckets(min_val.loc[metric].round(0), max_val.loc[metric].round(0))
    print(metric, buckets)

oper_margin [(-58.0, -42.75), (-42.75, -27.5), (-27.5, -12.25), (-12.25, 3.0), (3.0, 18.25), (18.25, 33.5), (33.5, 48.75), (48.75, 64.0), (48.75, 64.0)]
tot_debt_to_tot_eqy [(4.0, 412.62), (412.62, 821.25), (821.25, 1229.88), (1229.88, 1638.5), (1638.5, 2047.12), (2047.12, 2455.75), (2455.75, 2864.38), (2864.38, 3273.0), (2864.38, 3273.0)]
tot_debt_to_ebitda [(0.0, 7.62), (7.62, 15.25), (15.25, 22.88), (22.88, 30.5), (30.5, 38.12), (38.12, 45.75), (45.75, 53.38), (53.38, 61.0), (53.38, 61.0)]
ebitda_to_tot_int_exp [(-1.0, 2.12), (2.12, 5.25), (5.25, 8.38), (8.38, 11.5), (11.5, 14.62), (14.62, 17.75), (17.75, 20.88), (20.88, 24.0), (20.88, 24.0)]
return_on_asset [(-24.0, -18.38), (-18.38, -12.75), (-12.75, -7.12), (-7.12, -1.5), (-1.5, 4.12), (4.12, 9.75), (9.75, 15.38), (15.38, 21.0), (15.38, 21.0)]
asset_turnover [(0.0, 0.5), (0.5, 1.0), (1.0, 1.5), (1.5, 2.0), (2.0, 2.5), (2.5, 3.0), (3.0, 3.5), (3.5, 4.0), (3.5, 4.0)]
