# **Predictive Default Risk Assessor V.01**

# TODO

* Base model 
* Comparison
* Specialised
* For small entities - Examples?
* Backtest - All sectors 
* Understanding the model across all sectors/industries
* Any markets - consumer goods, industries
* UI last step after backtesting

In [10]:
import json

import numpy as np
import pandas as pd

from collections import namedtuple
from sklearn.metrics import mean_squared_error, root_mean_squared_error, accuracy_score, mean_absolute_percentage_error
from dataclasses import dataclass
from quantstats import * 
extend_pandas()

In [2]:
features = pd.read_excel("dataset/features.xlsx", index_col=0)
targets = pd.read_excel("dataset/target.xlsx", index_col=0)
features.columns = features.columns.str.lower()

In [3]:
leverage_coverage_metrics = {
    'class_weight': 20,
    'metric_weights': {
        'debt_to_equity': 0.2,
        'debt_to_ebitda': 0.2,
        'ebitda_to_interest_expense': 0.2,
        'debt_to_tangible_assets': 0.4
    },
    'metrics': {
        'debt_to_equity': {
            'lower_is_better':True,
            'thresholds':[
                (float('-inf'), 0.25),
                (0.25, 0.50),
                (0.50, 0.75),
                (0.75, 1.00),
                (1.00, 1.50),
                (1.50, 2.00),
                (2.00, 3.00),
                (3.00, 5.00),
                (5.00, float('inf'))
            ]
        },
        'debt_to_ebitda': {
            'lower_is_better':True,
            'thresholds':[
                (float("-inf"), 0.5),
                (0.5, 1.0),
                (1.0, 2.0),
                (2.0, 3.0),
                (3.0, 4.5),
                (4.5, 6.5),
                (6.5, 9.0),
                (6.5, 9.0),
                (9.0, float("inf"))
            ]
        },
        'ebitda_to_interest_expense': {
            'lower_is_better':False,
            'thresholds':[
                (25.0, float("inf")),
                (15.0, 25.0),
                (10.0, 15.0),
                (6.0, 10.0),
                (3.0, 6.0),
                (1.0, 3.0),
                (0.0, 1.0),
                (0.0, 1.0),
                (float("-inf"), 0.0)
            ]
        },
        'debt_to_tangible_assets': {
            'lower_is_better':True,
            'thresholds':[
                (float('-inf'), 0.20),
                (0.20, 0.40),
                (0.40, 0.60),
                (0.60, 0.80),
                (0.80, 1.00),
                (1.00, 1.20),
                (1.20, 1.50),
                (1.50, 2.00),
                (2.00, float('inf'))
            ]
        }
    }
}

efficiency_metrics = {
    'class_weight': 15,
    'metric_weights': {
        'asset_turnover': 0.4,
        'inventory_to_cost_of_sales': 0.3,
        'cash_to_assets': 0.3
    },
    'metrics': {
        'asset_turnover': {
            'lower_is_better':False,
            'thresholds':[
                (5.00, float('inf')),
                (4.00, 5.00),
                (3.00, 4.00),
                (2.50, 3.00),
                (2.00, 2.50),
                (1.50, 2.00),
                (1.00, 1.50),
                (0.50, 1.00),
                (float('-inf'), 0.50)
            ]
        },
        'inventory_to_cost_of_sales': {
            'lower_is_better':True,
            'thresholds':[
                (float('-inf'), 0.10),
                (0.10, 0.20),
                (0.20, 0.30),
                (0.30, 0.40),
                (0.40, 0.50),
                (0.50, 0.60),
                (0.60, 0.80),
                (0.80, 1.00),
                (1.00, float('inf'))
            ]
        },
        'cash_to_assets': {
            'lower_is_better':False,
            'thresholds':[
                (0.50, float('inf')),
                (0.40, 0.50),
                (0.30, 0.40),
                (0.25, 0.30),
                (0.20, 0.25),
                (0.15, 0.20),
                (0.10, 0.15),
                (0.05, 0.10),
                (float('-inf'), 0.05)
            ]
        }
    }
}

profitability_metrics = {
    'class_weight': 25,
    'metric_weights': {
        'ebitda_margin': 0.4,
        'total_assets': 0.3,
        'sales_growth': 0.3
    },
    'metrics': {
        'ebitda_margin': {
            'lower_is_better':False,
            'thresholds':[
                (40.0, float('inf')), 
                (35.0, 40.0), 
                (30.0, 35.0),
                (25.0, 30.0), 
                (20.0, 25.0), 
                (15.0, 20.0),
                (10.0, 15.0), 
                (5.0, 10.0), 
                (float('-inf'), 5.0)
            ]
        },
        'total_assets': {
            'lower_is_better':False,
            'thresholds':[
                (500_000_000, float('inf')), 
                (100_000_000, 500_000_000), 
                (50_000_000, 100_000_000),
                (10_000_000, 50_000_000), 
                (5_000_000, 10_000_000), 
                (1_000_000, 5_000_000),
                (500_000, 1_000_000), 
                (100_000, 500_000), 
                (float('-inf'), 100_000)
            ]
        },
        'sales_growth': {
            'lower_is_better':False,
            'thresholds':[
                (40.0, float('inf')), 
                (30.0, 40.0), 
                (25.0, 30.0),
                (20.0, 25.0), 
                (15.0, 20.0), 
                (10.0, 15.0),
                (5.0, 10.0), 
                (0.0, 5.0), 
                (float('-inf'), 0.0)
            ]
        }
    }
}

In [4]:
financial_metrics = {
    'profitability_metrics':profitability_metrics,
    'leverage_coverage_metrics': leverage_coverage_metrics,
    'efficiency_metrics': efficiency_metrics
}

In [5]:
class CreditRatingCalculator:
    def __init__(self, metrics):
        self.metrics = metrics
        self.calculation_details = {"metrics": {}}

    def _calculate_metric_score(self, value, thresholds, inverse):
        for score, (lower, upper) in enumerate(thresholds, start=1):
            if inverse and value <= upper or not inverse and value >= lower:
                return score
        return len(thresholds) // 2

    def _determine_credit_rating(self, score):
        credit_ratings = [
            ("Aaa", 2.5),
            ("Aa", 3.5),
            ("A", 4.5),
            ("Baa", 5.5),
            ("Ba", 6.5),
            ("B", 7.5),
            ("Caa", 8.5),
            ("Ca", 9.5),
            ("C", float("inf")),
        ]
        for rating, threshold in credit_ratings:
            if score <= threshold:
                return rating

    def _calculate_category_score(self, category, category_data):
        category_ratios = self.ratios[category]
        metric_weights = category_data["metric_weights"]
        total_weighted_score = 0

        for metric_name, metric_data in category_data["metrics"].items():
            value = category_ratios[metric_name]
            score = self._calculate_metric_score(value, metric_data["thresholds"], metric_data["lower_is_better"])
            weight = metric_weights[metric_name]
            weighted_score = score * weight
            rating = self._determine_credit_rating(score)
            total_weighted_score += weighted_score

            self.calculation_details["metrics"][metric_name] = {
                "category": category,
                "value": value,
                "score": score,
                "weight": weight,
                "weighted_score": weighted_score,
                "rating": rating,
            }

        return total_weighted_score

    def _calculate_scores(self):
        return {category: self._calculate_category_score(category, category_data)
                for category, category_data in self.metrics.items()}

    def _calculate_weighted_score(self, scores):
        total_weighted_score = sum(
            scores[category] * category_data["class_weight"]
            for category, category_data in self.metrics.items()
        )
        total_weight = sum(category_data["class_weight"] for category_data in self.metrics.values())
        return total_weighted_score / total_weight

    def calculate_credit_rating(self, ratios):
        self.ratios = ratios
        scores = self._calculate_scores()
        credit_score = self._calculate_weighted_score(scores)
        credit_rating = self._determine_credit_rating(credit_score)

        self.calculation_details.update({
            "scores": scores,
            "credit_score": credit_score,
            "credit_rating": credit_rating,
        })

In [6]:
def get_expected_metrics(data):
    return {
        category: {metric: sum(values) / len(values) 
        for metric, values in metrics.items()}
        for category, metrics in data.items()
    }

def get_nested_dict(data):
    nested_dict = {}

    for (category, metric), values in data.iterrows():
        if category not in nested_dict:
            nested_dict[category] = {}
        nested_dict[category][metric] = values.tolist()

    return nested_dict

def get_period_metrics(data):
    n = len(data['leverage_coverage_metrics']['debt_to_equity'])
    return {
        i: {
            category: {metric: values[i] 
            for metric, values in metrics.items()}
            for category, metrics in data.items()
        }
        for i in range(n)
    }


def bayesian_ridge_model(metrics, periods=1, max_iter=300, tol=1e-3):
    import numpy as np
    from sklearn.linear_model import BayesianRidge

    predictions = {}
    for metric_group, values_dict in metrics.items():
        predictions[metric_group] = {}
        for metric, values in values_dict.items():
            X = np.arange(len(values)).reshape(-1, 1)
            
            model = BayesianRidge(max_iter=max_iter, tol=tol).fit(X, values)
            
            next_periods = np.arange(len(values), len(values) + periods).reshape(-1, 1)
            predictions[metric_group][metric] = model.predict(next_periods).tolist() if periods > 0 else list(values)
    return predictions

In [7]:
# Define the data
data = { 
    ('leverage_coverage_metrics', 'debt_to_equity'): [1.2, 1.0, 1.3, 1.2], 
    ('leverage_coverage_metrics', 'debt_to_ebitda'): [3.5, 3.6, 3.4, 3.5], 
    ('leverage_coverage_metrics', 'ebitda_to_interest_expense'): [8.0, 7.5, 8.2, 8.0], 
    ('leverage_coverage_metrics', 'debt_to_tangible_assets'): [0.6, 0.7, 0.5, 0.6], 
    ('efficiency_metrics', 'asset_turnover'): [1.8, 1.7, 1.9, 1.8], 
    ('efficiency_metrics', 'inventory_to_cost_of_sales'): [0.4, 0.5, 0.3, 0.4], 
    ('efficiency_metrics', 'cash_to_assets'): [0.2, 0.25, 0.15, 0.2], 
    ('profitability_metrics', 'ebitda_margin'): [18.0, 19.0, 17.5, 18.0], 
    ('profitability_metrics', 'total_assets'): [50000000, 52000000, 51000000, 50000000], 
    ('profitability_metrics', 'sales_growth'): [12.0, 13.0, 11.5, 12.0],
}

# Create a multi-index DataFrame
index = pd.MultiIndex.from_tuples(data.keys(), names=['Metric Category', 'Metric Name'])
df = pd.DataFrame(data.values(), index=index, columns=['Q1', 'Q2', 'Q3', 'Q4'])

In [8]:
rating_description_dict = {
    "Aaa": "Issuers assessed Aaa are judged to have the highest intrinsic, or standalone, financial strength, and thus subject to the lowest level of credit risk absent any possibility of extraordinary support from an affiliate or a government.",
    "Aa": "Issuers assessed Aa are judged to have high intrinsic, or standalone, financial strength, and thus subject to very low credit risk absent any possibility of extraordinary support from an affiliate or a government.",
    "A": "Issuers assessed A are judged to have upper-medium-grade intrinsic, or standalone, financial strength, and thus subject to low credit risk absent any possibility of extraordinary support from an affiliate or a government.",
    "Baa": "Issuers assessed Baa are judged to have medium-grade intrinsic, or standalone, financial strength, and thus subject to moderate credit risk and, as such, may possess certain speculative credit elements absent any possibility of extraordinary support from an affiliate or a government.",
    "Ba": "Issuers assessed Ba are judged to have speculative intrinsic, or standalone, financial strength, and are subject to substantial credit risk absent any possibility of extraordinary support from an affiliate or a government.",
    "B": "Issuers assessed B are judged to have speculative intrinsic, or standalone, financial strength, and are subject to high credit risk absent any possibility of extraordinary support from an affiliate or a government.",
    "Caa": "Issuers assessed Caa are judged to have speculative intrinsic, or standalone, financial strength, and are subject to very high credit risk absent any possibility of extraordinary support from an affiliate or a government.",
    "Ca": "Issuers assessed Ca have highly speculative intrinsic, or standalone, financial strength, and are likely to be either in, or very near, default, with some prospect for recovery of principal and interest; or, these issuers have avoided default or are expected to avoid default through the provision of extraordinary support from an affiliate or a government.",
    "C": "Issuers assessed C are typically in default, with little prospect for recovery of principal or interest; or, these issuers are benefiting from a government or affiliate support but are likely to be liquidated over time; without support there would be little prospect for recovery of principal or interest."
}

In [26]:
@dataclass
class HelloCredit:
    file_path: str = None
    metrics_path: str = None
    
    def __post_init__(self):        
        self.file_path = self.file_path or "data.xlsx"
        self.metrics_path = self.metrics_path or "metrics.json"
        
        with open(self.metrics_path, "r") as f:
            self.metrics = json.load(f)
            
        self.class_weights = {m: self.metrics[m]["class_weight"] for m in self.metrics}
        self.dataframe = pd.read_excel(self.file_path, index_col=[0, 1])
        self.nested_dict = get_nested_dict(self.dataframe)
        self.company_period_metrics = get_period_metrics(self.nested_dict)
        self.company_expected_metrics = get_expected_metrics(self.nested_dict)
        self.input_dict = {
            "company_name": None,
            "calculator_model": {"sector": "Corporates", "size": "Small"},
            "factor_weights_model": self.class_weights,
            "probabilistic_model": {"periods": 1, "max_iter": 300, "tol": 1e-3}
        }

    
    
    def update_input_dict(self, input_dict: dict) -> dict:  
        def deep_update(d, u):
            for k, v in u.items():
                if isinstance(v, dict):
                    d[k] = deep_update(d.get(k, {}), v)
                else:
                    d[k] = v
            return d

        self.input_dict = deep_update(self.input_dict, input_dict)
    
    
    def update_output_dict(self):
        calculator = CreditRatingCalculator(self.metrics)
        calculator.calculate_credit_rating(self.company_expected_metrics)
        calculator_output = calculator.calculation_details

        # Single Period Calcs
        calculator_periods_output = {}
        for period in self.company_period_metrics:
            calculator = CreditRatingCalculator(self.metrics)
            calculator.calculate_credit_rating(self.company_period_metrics[period])
            calculator_periods_output[period] = calculator.calculation_details

        # Bayesian Model
        bayesian_model_output = bayesian_ridge_model(self.nested_dict, **self.input_dict["probabilistic_model"])

        # Rating Description
        rating_description = rating_description_dict[calculator_output["credit_rating"]]

        output_dict = {
            "company_name": self.input_dict["company_name"],
            "rating_description": rating_description,
            "company_expected_metrics": self.company_expected_metrics,
            "company_period_metrics": self.company_period_metrics,
            "calculator_output": calculator_output,
            "calculator_periods_output": calculator_periods_output,
            "bayesian_model_output": bayesian_model_output,
            "metrics": self.metrics
        }
        return output_dict

In [27]:
m = HelloCredit()

In [29]:
m.update_input_dict({"probabilistic_model": {"periods": 10}})

# MODEL TRAINING

In [26]:
def calculate_loss(model_inputs):
    yhat = []
    for company in features.index:
        ratios = features.loc[company][model_metrics].to_dict()    
        model = CreditRatingCalculator(model_inputs)
        model.calculate_credit_rating(ratios)

        credit_score = model.credit_score
        credit_rating = model.credit_rating

        yhat.append(credit_score)

    y_true = targets['numeric_rating']
    yhat = np.round(yhat, 1)
    loss = mean_absolute_percentage_error(yhat, y_true)
    return loss

def normalize_weights(weights):
    total = sum(weights)
    return [weight / total for weight in weights]

def train_model(model_inputs, learning_rate=0.01, num_iterations=1000):
    
    np.random.seed(23)
    
    # Initialize weights and class_weights
    for category in model_inputs.values():
        category["class_weight"] = np.random.random()
        category["weights"] = np.random.random(len(category["weights"]))
        category["weights"] = normalize_weights(category["weights"])

    # Perform gradient descent
    for epoch in range(num_iterations):
        # Calculate gradients
        gradients = {}
        for category, category_data in model_inputs.items():
            gradients[category] = {
                "class_weight": 0.0,
                "weights": np.zeros_like(category_data["weights"])
            }

        # Calculate loss and gradients
        loss = calculate_loss(model_inputs)
        for category, category_data in model_inputs.items():
            # Calculate gradient for class_weight
            category_data["class_weight"] += 0.0001
            gradients[category]["class_weight"] = (calculate_loss(model_inputs) - loss) / 0.0001
            category_data["class_weight"] -= 0.0001

            # Calculate gradients for weights
            for i in range(len(category_data["weights"])):
                category_data["weights"][i] += 0.0001
                gradients[category]["weights"][i] = (calculate_loss(model_inputs) - loss) / 0.0001
                category_data["weights"][i] -= 0.0001

        # Update weights and class_weights
        for category, category_data in model_inputs.items():
            category_data["class_weight"] -= learning_rate * gradients[category]["class_weight"]
            category_data["weights"] -= learning_rate * gradients[category]["weights"]
            category_data["weights"] = normalize_weights(category_data["weights"])

        # Normalize class_weights
        class_weights = [category_data["class_weight"] for category_data in model_inputs.values()]
        normalized_class_weights = normalize_weights(class_weights)
        for category, weight in zip(model_inputs.keys(), normalized_class_weights):
            model_inputs[category]["class_weight"] = weight
        
        if epoch % 100 == 0:
            print(f"Epoch {epoch}: Loss = {loss:.4f}")
        
    return model_inputs

In [27]:
# Train the model
trained_model_inputs = train_model(model_inputs, learning_rate=0.1, num_iterations=300)

# Print the optimized weights and class_weights
for category, category_data in trained_model_inputs.items():
    print(f"Category: {category}")
    print(f"Class Weight: {category_data['class_weight']}")
    print(f"Weights: {category_data['weights']}")
    print()

Epoch 0: Loss = 0.2761
Epoch 100: Loss = 0.2593
Epoch 200: Loss = 0.2593
Category: profitability
Class Weight: 0.367213686010887
Weights: [1.0]

Category: leverage_coverage
Class Weight: 0.614510015986914
Weights: [0.2373745964555163, 0.18580497915793837, 0.5768204243865455]

Category: efficiency
Class Weight: 0.018276298002199015
Weights: [0.3883666310182689, 0.6116333689817312]



In [4]:
import numpy as np
from scipy.optimize import minimize

def calculate_loss(model_inputs):
    yhat = []
    for company in features.index:
        ratios = features.loc[company][model_metrics].to_dict()
        model = CreditRatingCalculator(model_inputs)
        model.calculate_credit_rating(ratios)
        credit_score = model.credit_score
        yhat.append(credit_score)
    y_true = targets['numeric_rating']
    yhat = np.round(yhat, 1)
    loss = mean_absolute_percentage_error(yhat, y_true)
    return loss

def normalize_weights(weights):
    total = sum(weights)
    return [weight / total for weight in weights]

def train_model(model_inputs, learning_rate=0.01, num_iterations=5000):
    np.random.seed(23)
    
    # Initialize weights and class_weights
    for category in model_inputs.values():
        category["class_weight"] = np.random.random()
        category["weights"] = np.random.random(len(category["weights"]))
        category["weights"] = normalize_weights(category["weights"])
    
    # Define the objective function for optimization
    def objective(params):
        idx = 0
        for category in model_inputs.values():
            category["class_weight"] = params[idx]
            idx += 1
            category["weights"] = params[idx:idx+len(category["weights"])]
            idx += len(category["weights"])
        return calculate_loss(model_inputs)
    
    # Define the bounds for optimization
    bounds = []
    for category in model_inputs.values():
        bounds.append((0, 1))  # Class weight bounds
        bounds.extend([(0, 1)] * len(category["weights"]))  # Weight bounds
    
    # Perform optimization using L-BFGS-B
    initial_params = []
    for category in model_inputs.values():
        initial_params.append(category["class_weight"])
        initial_params.extend(category["weights"])
    
    result = minimize(objective, initial_params, method='L-BFGS-B', bounds=bounds, options={'maxiter': num_iterations})
    print(result)
    
    # Update the optimized weights and class_weights
    optimized_params = result.x
    idx = 0
    for category in model_inputs.values():
        category["class_weight"] = optimized_params[idx]
        idx += 1
        category["weights"] = optimized_params[idx:idx+len(category["weights"])]
        idx += len(category["weights"])
    
    return model_inputs

In [5]:
# Train the model
trained_model_inputs = train_model(model_inputs, learning_rate=0.01, num_iterations=3000)

# Print the optimized weights and class_weights
for category, category_data in trained_model_inputs.items():
    print(f"Category: {category}")
    print(f"Class Weight: {category_data['class_weight']}")
    print(f"Weights: {category_data['weights']}")
    print()

NameError: name 'model_inputs' is not defined

In [None]:
def calculate_loss(model_inputs):
    yhat = []
    for company in features.index:
        ratios = features.loc[company][model_metrics].to_dict()
        model = CreditRatingCalculator(model_inputs)
        model.calculate_credit_rating(ratios)
        credit_score = model.credit_score
        yhat.append(credit_score)
    y_true = targets['numeric_rating']
    yhat = np.round(yhat, 1)
    loss = mean_absolute_percentage_error(yhat, y_true)
    return loss

In [28]:
calculate_loss(trained_model_inputs)

0.2592912997321396

In [14]:
def train_model(model_inputs, learning_rate=0.01, num_iterations=5000):
    np.random.seed(23)
    
    # Initialize weights and class_weights
    for category in model_inputs.values():
        category["class_weight"] = np.random.random()
        category["weights"] = np.random.random(len(category["weights"]))
        category["weights"] = normalize_weights(category["weights"])
    
    # Define the objective function for optimization
    def objective(params):
        idx = 0
        for category in model_inputs.values():
            category["class_weight"] = params[idx]
            idx += 1
            category["weights"] = params[idx:idx+len(category["weights"])]
            idx += len(category["weights"])
        return calculate_loss(model_inputs)
    
    # Define the bounds for optimization
    bounds = []
    for category in model_inputs.values():
        bounds.append((0, 1))  # Class weight bounds
        bounds.extend([(0, 1)] * len(category["weights"]))  # Weight bounds
    
    # Perform optimization using L-BFGS-B
    initial_params = []
    for category in model_inputs.values():
        initial_params.append(category["class_weight"])
        initial_params.extend(category["weights"])
    
    result = minimize(objective, initial_params, method='L-BFGS-B', bounds=bounds, options={'maxiter': num_iterations})
    
    # Update the optimized weights and class_weights
    optimized_params = result.x
    idx = 0
    for category in model_inputs.values():
        category["class_weight"] = optimized_params[idx]
        idx += 1
        category["weights"] = optimized_params[idx:idx+len(category["weights"])]
        idx += len(category["weights"])
    
    # Calculate and print the final error
    final_error = calculate_loss(model_inputs)
    print(f"Final Error: {final_error:.4f}")
    
    return model_inputs

In [110]:
def calculate_loss(model_inputs):
    yhat = []
    for company in features.index:
        ratios = features.loc[company][model_metrics].to_dict()    
        model = CreditRatingCalculator(model_inputs)
        model.calculate_credit_rating(ratios)

        credit_score = model.credit_score
        credit_rating = model.credit_rating

        yhat.append(credit_score)

    y_true = targets['numeric_rating']
    yhat = np.round(yhat, 1)
    loss = mean_absolute_percentage_error(yhat, y_true)
    print(y_true.values)
    print(yhat)
    return loss

In [111]:
calculate_loss(trained_model_inputs)

[5.5 5.5 4.5 6.5 4.5 5.5 6.5 4.5 6.5 6.5 7.5 5.5 5.5 6.5 5.5 5.5 6.5 6.5
 5.5 6.5 5.5 6.5 5.5 6.5 6.5 6.5 6.5 6.5 5.5 8.5 6.5 6.5 6.5 6.5 6.5 6.5
 6.5 6.5 6.5 6.5 6.5 6.5 6.5 6.5 6.5 6.5 5.5 6.5 5.5 6.5 6.5 6.5 6.5 6.5
 6.5 6.5 6.5 5.5 6.5 6.5 7.5 6.5 6.5 5.5 5.5 6.5 6.5 5.5 6.5 6.5]
[9.1 3.7 6.9 9.3 2.6 3.5 9.4 5.8 6.8 0.2 3.5 3.7 4.  0.2 3.5 9.2 0.2 7.
 0.2 3.8 3.5 0.2 3.6 9.3 9.2 9.2 9.2 3.7 7.  9.3 0.2 5.7 9.  9.  9.4 2.4
 3.8 3.8 4.6 8.1 8.  9.2 4.6 4.6 9.1 6.9 9.3 4.7 4.  6.9 9.1 3.8 3.7 3.7
 3.7 0.3 4.6 4.1 9.2 6.9 6.9 9.  6.9 4.7 2.5 4.2 3.7 4.7 4.7 4.2]


3.305207936385858

In [141]:
def get_buckets(min_val, max_val, lower_is_better=False, num_buckets=9):
    """
    Generates optimized buckets based on min, max values, desired number of buckets, and whether lower values are better.

    Args:
        min_val (float): The minimum value.
        max_val (float): The maximum value.
        num_buckets (int, optional): Number of buckets. Defaults to 9.
        lower_is_better (bool, optional): True if lower values are better, else False. Defaults to False.

    Returns:
        list: List of tuples (start, end) representing each bucket's range.
    """
    min_val, max_val = (max_val, min_val) if lower_is_better else (min_val, max_val)
    interval = (max_val - min_val) / (num_buckets - 1)
    buckets = [(round(min_val + i * interval, 2), round(min_val + (i + 1) * interval, 2)) for i in range(num_buckets - 1)]
    buckets.append((round(max_val - interval, 2), max_val))
    return list(reversed(buckets)) if lower_is_better else buckets


# Redefine the values and number of buckets for clarity
min_val = -50
max_val = 50

# Generate the optimized buckets
buckets_list = get_buckets(min_val,  max_val)
print(buckets_list)

[(-50.0, -37.5), (-37.5, -25.0), (-25.0, -12.5), (-12.5, 0.0), (0.0, 12.5), (12.5, 25.0), (25.0, 37.5), (37.5, 50.0), (37.5, 50)]


In [144]:
max_val = features[model_metrics].max()
min_val = features[model_metrics].min()

In [155]:
for metric in model_metrics:
    buckets = get_buckets(min_val.loc[metric].round(0), max_val.loc[metric].round(0))
    print(metric, buckets)

oper_margin [(-58.0, -42.75), (-42.75, -27.5), (-27.5, -12.25), (-12.25, 3.0), (3.0, 18.25), (18.25, 33.5), (33.5, 48.75), (48.75, 64.0), (48.75, 64.0)]
tot_debt_to_tot_eqy [(4.0, 412.62), (412.62, 821.25), (821.25, 1229.88), (1229.88, 1638.5), (1638.5, 2047.12), (2047.12, 2455.75), (2455.75, 2864.38), (2864.38, 3273.0), (2864.38, 3273.0)]
tot_debt_to_ebitda [(0.0, 7.62), (7.62, 15.25), (15.25, 22.88), (22.88, 30.5), (30.5, 38.12), (38.12, 45.75), (45.75, 53.38), (53.38, 61.0), (53.38, 61.0)]
ebitda_to_tot_int_exp [(-1.0, 2.12), (2.12, 5.25), (5.25, 8.38), (8.38, 11.5), (11.5, 14.62), (14.62, 17.75), (17.75, 20.88), (20.88, 24.0), (20.88, 24.0)]
return_on_asset [(-24.0, -18.38), (-18.38, -12.75), (-12.75, -7.12), (-7.12, -1.5), (-1.5, 4.12), (4.12, 9.75), (9.75, 15.38), (15.38, 21.0), (15.38, 21.0)]
asset_turnover [(0.0, 0.5), (0.5, 1.0), (1.0, 1.5), (1.5, 2.0), (2.0, 2.5), (2.5, 3.0), (3.0, 3.5), (3.5, 4.0), (3.5, 4.0)]
