# **Predictive Default Risk Assessor V.01**

# TODO

* Base model 
* Comparison
* Specialised
* For small entities - Examples?
* Backtest - All sectors 
* Understanding the model across all sectors/industries
* Any markets - consumer goods, industries
* UI last step after backtesting

In [1]:
import json

import numpy as np
import pandas as pd

from collections import namedtuple
from sklearn.metrics import mean_squared_error, root_mean_squared_error, accuracy_score, mean_absolute_percentage_error
from dataclasses import dataclass
from quantstats import * 
from hellocredit import *
from hellocredit.utils import get_rating_meta
extend_pandas()

In [4]:
get_rating_meta("Rating", "A")

{'score_range': 4.5,
 'rating': 'A',
 'description': 'Issuers assessed A are judged to have upper-medium-grade intrinsic, or standalone, financial strength, and thus subject to low credit risk absent any possibility of extraordinary support from an affiliate or a government.',
 'probability_of_default': 0.1}

In [2]:
MAPPED_RATINGS = [
    ("Aaa", 2.5),
    ("Aa", 3.5),
    ("A", 4.5),
    ("Baa", 5.5),
    ("Ba", 6.5),
    ("B", 7.5),
    ("Caa", 8.5),
    ("Ca", 9.5),
    ("C", float("inf")),
]

In [3]:
with open("metrics.json", "r") as f:
    metrics = json.load(f)

In [28]:
nested_dict = get_expected_metrics(get_nested_dict("creditwatch_large_company_template.xlsx"))

In [29]:
def calculate_credit_rating(metrics, ratios):
    calculation_details = {"metrics": {}}

    def determine_credit_rating(score):
        for rating, threshold in MAPPED_RATINGS:
            if score <= threshold:
                return rating
        return len(MAPPED_RATINGS) // 2  # Return the middile rating if no threshold is met

    def calculate_metric_score(value, thresholds, lower_is_better):
        for score, (lower, upper) in enumerate(thresholds, start=1):
            if lower_is_better and value <= upper or not lower_is_better and value >= lower:
                return score
        return len(thresholds) // 2

    def calculate_category_score(category, category_data):
        category_ratios = ratios[category]
        metric_weights = category_data["metric_weights"]
        total_weighted_score = 0

        for metric_name, metric_data in category_data["metrics"].items():
            value = category_ratios[metric_name]
            score = calculate_metric_score(value, metric_data["thresholds"], metric_data["lower_is_better"])
            
            weight = metric_weights[metric_name]
            weighted_score = score * weight
            
            rating = determine_credit_rating(score)
            total_weighted_score += weighted_score
            calculation_details["metrics"][metric_name] = {
                "category": category,
                "value": value,
                "score": score,
                "weight": weight,
                "weighted_score": weighted_score,
                "rating": rating,
            }
        return total_weighted_score

    scores = {category: calculate_category_score(category, category_data)
              for category, category_data in metrics.items()}

    total_weighted_score = sum(
        scores[category] * category_data["class_weight"]
        for category, category_data in metrics.items()
    )
    total_weight = sum(category_data["class_weight"] for category_data in metrics.values())
    credit_score = total_weighted_score / total_weight
    credit_rating = determine_credit_rating(credit_score)

    calculation_details.update({
        "scores": scores,
        "credit_score": credit_score,
        "credit_rating": credit_rating,
    })
    
    return calculation_details

In [None]:
p = calculate_credit_rating(metrics, nested_dict)

In [5]:
def get_expected_metrics(data, n=100):
    return {
        category: {metric: sum(values[-n:]) / min(len(values), n) 
                   for metric, values in metrics.items()}
        for category, metrics in data.items()
    }


def get_nested_dict(data):
    nested_dict = {}

    for (category, metric), values in data.iterrows():
        if category not in nested_dict:
            nested_dict[category] = {}
        nested_dict[category][metric] = values.tolist()

    return nested_dict


def get_nested_dict(file_path):
    
    data = pd.read_excel(file_path, index_col=[0, 1])
    nested_dict = {}

    for (category, metric), values in data.iterrows():
        if category not in nested_dict:
            nested_dict[category] = {}
        nested_dict[category][metric] = values.tolist()

    return nested_dict


def get_period_metrics(data):
    n = len(data['leverage_coverage_metrics']['debt_to_equity'])
    return {
        i: {
            category: {metric: values[i] 
            for metric, values in metrics.items()}
            for category, metrics in data.items()
        }
        for i in range(n)
    }

def bayesian_ridge_transform(data):
    list_length = len(next(iter(next(iter(data.values())).values())))
    return [
        {
            category: {
                metric: values[i] for metric, values in metrics.items()
            } for category, metrics in data.items()
        } for i in range(list_length)
    ]


def bayesian_ridge_model(metrics, periods=1, look_back_periods=5, max_iter=300, tol=1e-3):
    import numpy as np
    from sklearn.linear_model import BayesianRidge

    predictions = {}
    
    for metric_group, values_dict in metrics.items():
        predictions[metric_group] = {}
        for metric, values in values_dict.items():
            X = np.arange(len(values)).reshape(-1, 1)
            
            model = BayesianRidge(max_iter=max_iter, tol=tol).fit(X, values)
            
            next_periods = np.arange(len(values), len(values) + periods).reshape(-1, 1)
            predictions[metric_group][metric] = model.predict(next_periods).tolist() if periods > 0 else list(values)
    return predictions



def bayesian_ridge_model(metrics, periods=1, look_back_periods=5, max_iter=300, tol=1e-3):
    import numpy as np
    from sklearn.linear_model import BayesianRidge
    predictions = {}
    periods, look_back_periods = abs(periods), abs(look_back_periods)
    
    for metric_group, values_dict in metrics.items():
        predictions[metric_group] = {}
        for metric, values in values_dict.items():
            
            recent_values = values[-look_back_periods:]
            X = np.arange(len(recent_values)).reshape(-1, 1)
            
            model = BayesianRidge(max_iter=max_iter, tol=tol).fit(X, recent_values)
            
            next_periods = np.arange(look_back_periods, look_back_periods + periods).reshape(-1, 1)
            predictions[metric_group][metric] = model.predict(next_periods).tolist() if periods > 0 else list(recent_values)
    return predictions

In [47]:
calculate_credit_rating(configuration, nested_dict)

{'metrics': {'ebitda_margin': {'category': 'profitability_metrics',
   'value': 11.411927777777777,
   'score': 7,
   'weight': 1.0,
   'weighted_score': 7.0,
   'rating': 'B'},
  'debt_to_equity': {'category': 'leverage_coverage_metrics',
   'value': 0.8838269444444443,
   'score': 4,
   'weight': 0.2,
   'weighted_score': 0.8,
   'rating': 'A'},
  'debt_to_ebitda': {'category': 'leverage_coverage_metrics',
   'value': 7.239883333333332,
   'score': 7,
   'weight': 0.4,
   'weighted_score': 2.8000000000000003,
   'rating': 'B'},
  'ebitda_to_interest_expense': {'category': 'leverage_coverage_metrics',
   'value': 4.300255555555556,
   'score': 5,
   'weight': 0.4,
   'weighted_score': 2.0,
   'rating': 'Baa'},
  'asset_turnover': {'category': 'efficiency_metrics',
   'value': 0.8485388888888887,
   'score': 8,
   'weight': 1.0,
   'weighted_score': 8.0,
   'rating': 'Caa'}},
 'scores': {'profitability_metrics': 7.0,
  'leverage_coverage_metrics': 5.6000000000000005,
  'efficiency_metr

In [39]:
nested_dict

{'leverage_coverage_metrics': {'debt_to_equity': 0.8838269444444443,
  'debt_to_ebitda': 7.239883333333332,
  'ebitda_to_interest_expense': 4.300255555555556},
 'efficiency_metrics': {'asset_turnover': 0.8485388888888887},
 'profitability_metrics': {'ebitda_margin': 11.411927777777777}}

In [41]:
configuration['profitability_metrics']

{'class_weight': 20,
 'metric_weights': {'ebitda_margin': 0.4,
  'total_assets': 0.3,
  'sales_growth': 0.3},
 'metrics': {'ebitda_margin': {'lower_is_better': False,
   'thresholds': [[40.0, inf],
    [35.0, 40.0],
    [30.0, 35.0],
    [25.0, 30.0],
    [20.0, 25.0],
    [15.0, 20.0],
    [10.0, 15.0],
    [5.0, 10.0],
    [-inf, 5.0]]},
  'total_assets': {'lower_is_better': False,
   'thresholds': [[500000000, inf],
    [100000000, 500000000],
    [50000000, 100000000],
    [10000000, 50000000],
    [5000000, 10000000],
    [1000000, 5000000],
    [500000, 1000000],
    [100000, 500000],
    [-inf, 100000]]},
  'sales_growth': {'lower_is_better': False,
   'thresholds': [[40.0, inf],
    [30.0, 40.0],
    [25.0, 30.0],
    [20.0, 25.0],
    [15.0, 20.0],
    [10.0, 15.0],
    [5.0, 10.0],
    [0.0, 5.0],
    [-inf, 0.0]]}}}

In [33]:
configuration.keys()

dict_keys(['profitability_metrics', 'leverage_coverage_metrics', 'efficiency_metrics'])

In [34]:
configuration['efficiency_metrics']['metric_weights']

{'asset_turnover': 0.5,
 'inventory_to_cost_of_sales': 0.3,
 'cash_to_assets': 0.2}

In [5]:
rating_description_dict = {
    "Aaa": "Issuers assessed Aaa are judged to have the highest intrinsic, or standalone, financial strength, and thus subject to the lowest level of credit risk absent any possibility of extraordinary support from an affiliate or a government.",
    "Aa": "Issuers assessed Aa are judged to have high intrinsic, or standalone, financial strength, and thus subject to very low credit risk absent any possibility of extraordinary support from an affiliate or a government.",
    "A": "Issuers assessed A are judged to have upper-medium-grade intrinsic, or standalone, financial strength, and thus subject to low credit risk absent any possibility of extraordinary support from an affiliate or a government.",
    "Baa": "Issuers assessed Baa are judged to have medium-grade intrinsic, or standalone, financial strength, and thus subject to moderate credit risk and, as such, may possess certain speculative credit elements absent any possibility of extraordinary support from an affiliate or a government.",
    "Ba": "Issuers assessed Ba are judged to have speculative intrinsic, or standalone, financial strength, and are subject to substantial credit risk absent any possibility of extraordinary support from an affiliate or a government.",
    "B": "Issuers assessed B are judged to have speculative intrinsic, or standalone, financial strength, and are subject to high credit risk absent any possibility of extraordinary support from an affiliate or a government.",
    "Caa": "Issuers assessed Caa are judged to have speculative intrinsic, or standalone, financial strength, and are subject to very high credit risk absent any possibility of extraordinary support from an affiliate or a government.",
    "Ca": "Issuers assessed Ca have highly speculative intrinsic, or standalone, financial strength, and are likely to be either in, or very near, default, with some prospect for recovery of principal and interest; or, these issuers have avoided default or are expected to avoid default through the provision of extraordinary support from an affiliate or a government.",
    "C": "Issuers assessed C are typically in default, with little prospect for recovery of principal or interest; or, these issuers are benefiting from a government or affiliate support but are likely to be liquidated over time; without support there would be little prospect for recovery of principal or interest."
}

In [6]:
MAPPED_RATINGS = [
    ("Aaa", 2.5),
    ("Aa", 3.5),
    ("A", 4.5),
    ("Baa", 5.5),
    ("Ba", 6.5),
    ("B", 7.5),
    ("Caa", 8.5),
    ("Ca", 9.5),
    ("C", float("inf")),
]



In [7]:
RATING_META = {
    "Score Range": ["≤ 2.5", "≤ 3.5", "≤ 4.5", "≤ 5.5", "≤ 6.5", "≤ 7.5", "≤ 8.5", "≤ 9.5", "> 9.5"],
    "Rating": ["Aaa", "Aa", "A", "Baa", "Ba", "B", "Caa", "Ca", "C"],
    "Description": [
        "Issuers assessed Aaa are judged to have the highest intrinsic, or standalone, financial strength, and thus subject to the lowest level of credit risk absent any possibility of extraordinary support from an affiliate or a government.",
        "Issuers assessed Aa are judged to have high intrinsic, or standalone, financial strength, and thus subject to very low credit risk absent any possibility of extraordinary support from an affiliate or a government.",
        "Issuers assessed A are judged to have upper-medium-grade intrinsic, or standalone, financial strength, and thus subject to low credit risk absent any possibility of extraordinary support from an affiliate or a government.",
        "Issuers assessed Baa are judged to have medium-grade intrinsic, or standalone, financial strength, and thus subject to moderate credit risk and, as such, may possess certain speculative credit elements absent any possibility of extraordinary support from an affiliate or a government.",
        "Issuers assessed Ba are judged to have speculative intrinsic, or standalone, financial strength, and are subject to substantial credit risk absent any possibility of extraordinary support from an affiliate or a government.",
        "Issuers assessed B are judged to have speculative intrinsic, or standalone, financial strength, and are subject to high credit risk absent any possibility of extraordinary support from an affiliate or a government.",
        "Issuers assessed Caa are judged to have speculative intrinsic, or standalone, financial strength, and are subject to very high credit risk absent any possibility of extraordinary support from an affiliate or a government.",
        "Issuers assessed Ca have highly speculative intrinsic, or standalone, financial strength, and are likely to be either in, or very near, default, with some prospect for recovery of principal and interest; or, these issuers have avoided default or are expected to avoid default through the provision of extraordinary support from an affiliate or a government.",
        "Issuers assessed C are typically in default, with little prospect for recovery of principal or interest; or, these issuers are benefiting from a government or affiliate support but are likely to be liquidated over time; without support there would be little prospect for recovery of principal or interest."
    ],
    "Probability of Default": ["0.00%", "0.01%", "0.10%", "0.46%", "2.31%", "7.62%", "17.86%", "50.00%", "100.00%"]
}

def get_rating_meta(rating):

    index = RATING_META["Rating"].index(rating)
    result = {}
    for key, value in RATING_META.items():
        formatted_key = key.lower().replace(' ', '_')
        
        if key == "Probability of Default":
            result[formatted_key] = float(value[index].rstrip('%'))
        elif key == "Score Range":
            score_str = value[index].replace('≤', '').replace('>', '').strip()
            result[formatted_key] = float(score_str)
        else:
            result[formatted_key] = value[index]

    return result

# Example usage
target_rating = "C"  # Using "C" to test the '> 9.5' case
result = get_rating_meta(target_rating)
print(result)

{'score_range': 9.5, 'rating': 'C', 'description': 'Issuers assessed C are typically in default, with little prospect for recovery of principal or interest; or, these issuers are benefiting from a government or affiliate support but are likely to be liquidated over time; without support there would be little prospect for recovery of principal or interest.', 'probability_of_default': 100.0}


In [8]:
def load_config(company_sector, company_size):
    sector = company_sector.lower()
    file_path = "metrics_large.json" if sector == "large" else "metrics.json"
    
    with open(file_path, "r") as f:
        return json.load(f)

In [9]:
configuration = json.load(open("metrics_large.json", "r"))

In [39]:
company_name = "Banana Capital Ltd"
company_sector = "Corporates"
company_size = "Small"
excel_file_path = "creditwatch_small_medium_company_template.xlsx"
work_directory = r"C:\Users\103763\Projects\HELLO_CREDIT\temp\9397831824fe4796"

configuration = load_config(company_sector, company_size)
input_dict = {
    "work_directory": work_directory,
    "file_path": excel_file_path,
    "company_meta": {
        "company_name": company_name,
        "company_size": company_size,
        "company_sector": company_sector,
    },
    "probabilistic_model": {
        "periods": 2, 
        "look_back_periods": 5, 
        "max_iter": 300, 
        "tol": 1e-3
    },
    "configuration": configuration
}

factor_weights = {
    weight: input_dict["configuration"].get(weight)["class_weight"] 
    for weight in input_dict["configuration"]
}

In [32]:
factor_weights

{'profitability_metrics': 20,
 'leverage_coverage_metrics': 25,
 'efficiency_metrics': 15}

In [40]:
nested_dict = get_nested_dict(input_dict["file_path"])
company_expected_metrics = get_expected_metrics(nested_dict)


In [43]:
nested_dict

{'leverage_coverage_metrics': {'debt_to_equity': [0.694277,
   0.5414800000000001,
   0.513237,
   0.582016,
   0.51855,
   0.17954699999999998,
   0.851673,
   0.654936,
   0.6750069999999999,
   0.705304,
   0.6780240000000001,
   0.7469750000000001,
   0.7660009999999999,
   0.7985859999999999,
   0.733993,
   0.351504,
   1.346962,
   4.570813,
   16.209048,
   5.61958175,
   5.61958175,
   5.61958175,
   5.61958175],
  'debt_to_ebitda': [3.3666,
   2.9485,
   2.4745,
   2.2575,
   1.9345,
   0.74,
   3.5469,
   3.0054,
   3.0252,
   2.9993,
   3.3896,
   4.8311,
   4.2058,
   4.7332,
   3.8616,
   3.2351,
   12.9589,
   66.8042,
   16.5446,
   22.9234,
   25.824099999999998,
   25.944,
   9.7695],
  'ebitda_to_interest_expense': [3.8652,
   4.8199,
   7.0297,
   6.4572,
   6.4745,
   5.7828,
   4.5832,
   3.6629,
   3.7094,
   3.0118,
   3.0072,
   3.0307,
   3.9186,
   4.3901,
   5.2548,
   6.1303,
   1.8618,
   0.4145,
   1.5622,
   0.4593,
   -0.1847,
   0.4896,
   1.1713],
  '

In [29]:
company_expected_metrics

{'leverage_coverage_metrics': {'debt_to_equity': 0.8838269444444443,
  'debt_to_ebitda': 7.239883333333332,
  'ebitda_to_interest_expense': 4.300255555555556},
 'efficiency_metrics': {'asset_turnover': 0.8485388888888887},
 'profitability_metrics': {'ebitda_margin': 11.411927777777777}}

In [33]:
[configuration[metric]["metrics"] for metric in configuration]

[{'ebitda_margin': {'lower_is_better': False,
   'thresholds': [[40.0, inf],
    [35.0, 40.0],
    [30.0, 35.0],
    [25.0, 30.0],
    [20.0, 25.0],
    [15.0, 20.0],
    [10.0, 15.0],
    [5.0, 10.0],
    [-inf, 5.0]]},
  'total_assets': {'lower_is_better': False,
   'thresholds': [[500000000, inf],
    [100000000, 500000000],
    [50000000, 100000000],
    [10000000, 50000000],
    [5000000, 10000000],
    [1000000, 5000000],
    [500000, 1000000],
    [100000, 500000],
    [-inf, 100000]]},
  'sales_growth': {'lower_is_better': False,
   'thresholds': [[40.0, inf],
    [30.0, 40.0],
    [25.0, 30.0],
    [20.0, 25.0],
    [15.0, 20.0],
    [10.0, 15.0],
    [5.0, 10.0],
    [0.0, 5.0],
    [-inf, 0.0]]}},
 {'debt_to_equity': {'lower_is_better': True,
   'thresholds': [[-inf, 0.25],
    [0.25, 0.5],
    [0.5, 0.75],
    [0.75, 1.0],
    [1.0, 1.5],
    [1.5, 2.0],
    [2.0, 3.0],
    [3.0, 5.0],
    [5.0, inf]]},
  'debt_to_ebitda': {'lower_is_better': True,
   'thresholds': [[-inf, 

In [37]:
[configuration[metric]["metrics"] for metric in configuration]

[{'ebitda_margin': {'lower_is_better': False,
   'thresholds': [[40.0, inf],
    [35.0, 40.0],
    [30.0, 35.0],
    [25.0, 30.0],
    [20.0, 25.0],
    [15.0, 20.0],
    [10.0, 15.0],
    [5.0, 10.0],
    [-inf, 5.0]]},
  'total_assets': {'lower_is_better': False,
   'thresholds': [[500000000, inf],
    [100000000, 500000000],
    [50000000, 100000000],
    [10000000, 50000000],
    [5000000, 10000000],
    [1000000, 5000000],
    [500000, 1000000],
    [100000, 500000],
    [-inf, 100000]]},
  'sales_growth': {'lower_is_better': False,
   'thresholds': [[40.0, inf],
    [30.0, 40.0],
    [25.0, 30.0],
    [20.0, 25.0],
    [15.0, 20.0],
    [10.0, 15.0],
    [5.0, 10.0],
    [0.0, 5.0],
    [-inf, 0.0]]}},
 {'debt_to_equity': {'lower_is_better': True,
   'thresholds': [[-inf, 0.25],
    [0.25, 0.5],
    [0.5, 0.75],
    [0.75, 1.0],
    [1.0, 1.5],
    [1.5, 2.0],
    [2.0, 3.0],
    [3.0, 5.0],
    [5.0, inf]]},
  'debt_to_ebitda': {'lower_is_better': True,
   'thresholds': [[-inf, 

In [355]:
from functools import partial
from dataclasses import field

@dataclass
class HelloCredit:
    input_dict: dict = field(default_factory=dict)
    
    def __post_init__(self):
        self.nested_dict = get_nested_dict(self.input_dict["file_path"])
        self.company_period_metrics = get_period_metrics(self.nested_dict)
        self.company_expected_metrics = get_expected_metrics(self.nested_dict)
        self.compute_rating = partial(calculate_credit_rating, metrics=self.input_dict["configuration"])
        self.work_dir = self.input_dict["work_directory"]
            
    def update_input_dict(self, input_dict: dict) -> dict:  
        def deep_update(d, u):
            for k, v in u.items():
                if isinstance(v, dict):
                    d[k] = deep_update(d.get(k, {}), v)
                else:
                    d[k] = v
            return d

        self.input_dict = deep_update(self.input_dict, input_dict)
    
    
    def run_function(self) -> None:
        # Expected and Single Period Calcs
        calculator_output = self.compute_rating(ratios=self.company_expected_metrics)
        rating_meta = get_rating_meta(calculator_output["credit_rating"])
        
        calculator_periods_output = {
            period: self.compute_rating(ratios=self.company_period_metrics[period])
            for period in self.company_period_metrics
        }
             
        # Bayesian Model
        model_output = bayesian_ridge_model(self.nested_dict, **self.input_dict["probabilistic_model"])
        model_output_transform = bayesian_ridge_transform(model_output)
        model_output_computed_rating = {
            period: self.compute_rating(ratios=model_output_transform[period]) 
            for period, metrics in enumerate(model_output_transform)
        }
        
        bayesian_model_output = {
            "model_output": model_output,
            "model_output_transform": model_output_transform,
            "computed_rating": model_output_computed_rating,
        }
        
        self.output_dict = {
            "rating_meta": rating_meta,
            "company_expected_metrics": self.company_expected_metrics,
            "company_period_metrics": self.company_period_metrics,
            "calculator_output": calculator_output,
            "calculator_periods_output": calculator_periods_output,
            "bayesian_model_output": bayesian_model_output,
        }
        
        
        files_to_dump = {
            "output_dict.json": self.output_dict,
            "input_dict.json": self.input_dict
        }

        for filename, data in files_to_dump.items():
            file_path = os.path.join(self.work_dir, filename)
            with open(file_path, "w") as f:
                json.dump(data, f)
        print("Files Dumped")
        return self.output_dict

In [18]:
MAPPED_RATINGS

[('Aaa', 2.5),
 ('Aa', 3.5),
 ('A', 4.5),
 ('Baa', 5.5),
 ('Ba', 6.5),
 ('B', 7.5),
 ('Caa', 8.5),
 ('Ca', 9.5),
 ('C', inf)]

In [320]:
from dataclasses import field



@dataclass
class HelloCredit:
    file_path: str
    company_meta: dict = field(default_factory=dict)
    
    
    def __post_init__(self):        
        self.file_path = self.file_path or "data.xlsx"
        
        
        
        self.metrics_path = self.metrics_path or "metrics.json"
        
        with open(self.metrics_path, "r") as f:
            self.metrics = json.load(f)
            
        self.class_weights = {m: self.metrics[m]["class_weight"] for m in self.metrics}
        self.dataframe = pd.read_excel(self.file_path, index_col=[0, 1])
        self.nested_dict = get_nested_dict(self.dataframe)
        self.company_period_metrics = get_period_metrics(self.nested_dict)
        self.company_expected_metrics = get_expected_metrics(self.nested_dict)
        
        self.input_dict = {
            "company_name": self.company_meta["company_name"],
            "calculator_model": {
                "sector": self.company_meta["company_sector"], 
                "size": self.company_meta["company_size"]
            },
            "factor_weights_model": self.class_weights,
            "probabilistic_model": {"periods": 1, "look_back_periods": 5, "max_iter": 300, "tol": 1e-3}
        }

    
    def update_input_dict(self, input_dict: dict) -> dict:  
        def deep_update(d, u):
            for k, v in u.items():
                if isinstance(v, dict):
                    d[k] = deep_update(d.get(k, {}), v)
                else:
                    d[k] = v
            return d

        self.input_dict = deep_update(self.input_dict, input_dict)
    
    
    def update_output_dict(self):
        calculator_output = calculate_credit_rating(self.metrics, self.company_expected_metrics)

        # Single Period Calcs
        calculator_periods_output = {}
        for period in self.company_period_metrics:
            calculator_periods_output[period] = calculate_credit_rating(self.metrics, self.company_period_metrics[period])
             
        # Bayesian Model
        bayesian_model_output = {}
        model_output = bayesian_ridge_model(self.nested_dict, **self.input_dict["probabilistic_model"])
        out_of_sample = bayesian_ridge_transform(model_output)
        for period, metrics in enumerate(out_of_sample):
            bayesian_model_output[period] = calculate_credit_rating(self.metrics, out_of_sample[period])
            

        # Rating Description
        rating_description = rating_description_dict[calculator_output["credit_rating"]]

        output_dict = {
            "company_name": self.input_dict["company_name"],
            "rating_description": rating_description,
            "company_expected_metrics": self.company_expected_metrics,
            "company_period_metrics": self.company_period_metrics,
            "calculator_output": calculator_output,
            "calculator_periods_output": calculator_periods_output,
            "bayesian_model_output": bayesian_model_output,
            "metrics": self.metrics
        }
        return output_dict

In [24]:
@dataclass
class HelloCredit:
    file_path: str = None
    metrics_path: str = None
    
    def __post_init__(self):        
        self.file_path = self.file_path or "data.xlsx"
        self.metrics_path = self.metrics_path or "metrics.json"
        
        with open(self.metrics_path, "r") as f:
            self.metrics = json.load(f)
            
        self.class_weights = {m: self.metrics[m]["class_weight"] for m in self.metrics}
        self.dataframe = pd.read_excel(self.file_path, index_col=[0, 1])
        self.nested_dict = get_nested_dict(self.dataframe)
        self.company_period_metrics = get_period_metrics(self.nested_dict)
        self.company_expected_metrics = get_expected_metrics(self.nested_dict)
        self.input_dict = {
            "company_name": None,
            "calculator_model": {"sector": "Corporates", "size": "Small"},
            "factor_weights_model": self.class_weights,
            "probabilistic_model": {"periods": 1, "max_iter": 300, "tol": 1e-3}
        }

    
    def update_input_dict(self, input_dict: dict) -> dict:  
        def deep_update(d, u):
            for k, v in u.items():
                if isinstance(v, dict):
                    d[k] = deep_update(d.get(k, {}), v)
                else:
                    d[k] = v
            return d

        self.input_dict = deep_update(self.input_dict, input_dict)
    
    
    def update_output_dict(self):
        calculator = CreditRatingCalculator(self.metrics)
        calculator.calculate_credit_rating(self.company_expected_metrics)
        calculator_output = calculator.calculation_details

        # Single Period Calcs
        calculator_periods_output = {}
        for period in self.company_period_metrics:
            calculator = CreditRatingCalculator(self.metrics)
            calculator.calculate_credit_rating(self.company_period_metrics[period])
            calculator_periods_output[period] = calculator.calculation_details

        # Bayesian Model
        bayesian_model_output = {}
        
        model_output = bayesian_ridge_model(self.nested_dict, **self.input_dict["probabilistic_model"])
        out_of_sample = bayesian_ridge_transform(model_output)
        for period, metrics in enumerate(out_of_sample):
            calculator = CreditRatingCalculator(self.metrics)
            calculator.calculate_credit_rating(out_of_sample[period])
            bayesian_model_output[period] = calculator.calculation_details

        # Rating Description
        rating_description = rating_description_dict[calculator_output["credit_rating"]]

        output_dict = {
            "company_name": self.input_dict["company_name"],
            "rating_description": rating_description,
            "company_expected_metrics": self.company_expected_metrics,
            "company_period_metrics": self.company_period_metrics,
            "calculator_output": calculator_output,
            "calculator_periods_output": calculator_periods_output,
            "bayesian_model_output": bayesian_model_output,
            "metrics": self.metrics
        }
        return output_dict

In [25]:
m = HelloCredit("stienhoff_data - Copy - Copy.xlsx")

In [26]:
output = m.update_output_dict()

NameError: name 'CreditRatingCalculator' is not defined

In [112]:
with open("outputt.json", "w") as f:
    json.dump(output, f)

# MODEL TRAINING

In [26]:
def calculate_loss(model_inputs):
    yhat = []
    for company in features.index:
        ratios = features.loc[company][model_metrics].to_dict()    
        model = CreditRatingCalculator(model_inputs)
        model.calculate_credit_rating(ratios)

        credit_score = model.credit_score
        credit_rating = model.credit_rating

        yhat.append(credit_score)

    y_true = targets['numeric_rating']
    yhat = np.round(yhat, 1)
    loss = mean_absolute_percentage_error(yhat, y_true)
    return loss

def normalize_weights(weights):
    total = sum(weights)
    return [weight / total for weight in weights]

def train_model(model_inputs, learning_rate=0.01, num_iterations=1000):
    
    np.random.seed(23)
    
    # Initialize weights and class_weights
    for category in model_inputs.values():
        category["class_weight"] = np.random.random()
        category["weights"] = np.random.random(len(category["weights"]))
        category["weights"] = normalize_weights(category["weights"])

    # Perform gradient descent
    for epoch in range(num_iterations):
        # Calculate gradients
        gradients = {}
        for category, category_data in model_inputs.items():
            gradients[category] = {
                "class_weight": 0.0,
                "weights": np.zeros_like(category_data["weights"])
            }

        # Calculate loss and gradients
        loss = calculate_loss(model_inputs)
        for category, category_data in model_inputs.items():
            # Calculate gradient for class_weight
            category_data["class_weight"] += 0.0001
            gradients[category]["class_weight"] = (calculate_loss(model_inputs) - loss) / 0.0001
            category_data["class_weight"] -= 0.0001

            # Calculate gradients for weights
            for i in range(len(category_data["weights"])):
                category_data["weights"][i] += 0.0001
                gradients[category]["weights"][i] = (calculate_loss(model_inputs) - loss) / 0.0001
                category_data["weights"][i] -= 0.0001

        # Update weights and class_weights
        for category, category_data in model_inputs.items():
            category_data["class_weight"] -= learning_rate * gradients[category]["class_weight"]
            category_data["weights"] -= learning_rate * gradients[category]["weights"]
            category_data["weights"] = normalize_weights(category_data["weights"])

        # Normalize class_weights
        class_weights = [category_data["class_weight"] for category_data in model_inputs.values()]
        normalized_class_weights = normalize_weights(class_weights)
        for category, weight in zip(model_inputs.keys(), normalized_class_weights):
            model_inputs[category]["class_weight"] = weight
        
        if epoch % 100 == 0:
            print(f"Epoch {epoch}: Loss = {loss:.4f}")
        
    return model_inputs

In [27]:
# Train the model
trained_model_inputs = train_model(model_inputs, learning_rate=0.1, num_iterations=300)

# Print the optimized weights and class_weights
for category, category_data in trained_model_inputs.items():
    print(f"Category: {category}")
    print(f"Class Weight: {category_data['class_weight']}")
    print(f"Weights: {category_data['weights']}")
    print()

Epoch 0: Loss = 0.2761
Epoch 100: Loss = 0.2593
Epoch 200: Loss = 0.2593
Category: profitability
Class Weight: 0.367213686010887
Weights: [1.0]

Category: leverage_coverage
Class Weight: 0.614510015986914
Weights: [0.2373745964555163, 0.18580497915793837, 0.5768204243865455]

Category: efficiency
Class Weight: 0.018276298002199015
Weights: [0.3883666310182689, 0.6116333689817312]



In [4]:
import numpy as np
from scipy.optimize import minimize

def calculate_loss(model_inputs):
    yhat = []
    for company in features.index:
        ratios = features.loc[company][model_metrics].to_dict()
        model = CreditRatingCalculator(model_inputs)
        model.calculate_credit_rating(ratios)
        credit_score = model.credit_score
        yhat.append(credit_score)
    y_true = targets['numeric_rating']
    yhat = np.round(yhat, 1)
    loss = mean_absolute_percentage_error(yhat, y_true)
    return loss

def normalize_weights(weights):
    total = sum(weights)
    return [weight / total for weight in weights]

def train_model(model_inputs, learning_rate=0.01, num_iterations=5000):
    np.random.seed(23)
    
    # Initialize weights and class_weights
    for category in model_inputs.values():
        category["class_weight"] = np.random.random()
        category["weights"] = np.random.random(len(category["weights"]))
        category["weights"] = normalize_weights(category["weights"])
    
    # Define the objective function for optimization
    def objective(params):
        idx = 0
        for category in model_inputs.values():
            category["class_weight"] = params[idx]
            idx += 1
            category["weights"] = params[idx:idx+len(category["weights"])]
            idx += len(category["weights"])
        return calculate_loss(model_inputs)
    
    # Define the bounds for optimization
    bounds = []
    for category in model_inputs.values():
        bounds.append((0, 1))  # Class weight bounds
        bounds.extend([(0, 1)] * len(category["weights"]))  # Weight bounds
    
    # Perform optimization using L-BFGS-B
    initial_params = []
    for category in model_inputs.values():
        initial_params.append(category["class_weight"])
        initial_params.extend(category["weights"])
    
    result = minimize(objective, initial_params, method='L-BFGS-B', bounds=bounds, options={'maxiter': num_iterations})
    print(result)
    
    # Update the optimized weights and class_weights
    optimized_params = result.x
    idx = 0
    for category in model_inputs.values():
        category["class_weight"] = optimized_params[idx]
        idx += 1
        category["weights"] = optimized_params[idx:idx+len(category["weights"])]
        idx += len(category["weights"])
    
    return model_inputs

In [5]:
# Train the model
trained_model_inputs = train_model(model_inputs, learning_rate=0.01, num_iterations=3000)

# Print the optimized weights and class_weights
for category, category_data in trained_model_inputs.items():
    print(f"Category: {category}")
    print(f"Class Weight: {category_data['class_weight']}")
    print(f"Weights: {category_data['weights']}")
    print()

NameError: name 'model_inputs' is not defined

In [None]:
def calculate_loss(model_inputs):
    yhat = []
    for company in features.index:
        ratios = features.loc[company][model_metrics].to_dict()
        model = CreditRatingCalculator(model_inputs)
        model.calculate_credit_rating(ratios)
        credit_score = model.credit_score
        yhat.append(credit_score)
    y_true = targets['numeric_rating']
    yhat = np.round(yhat, 1)
    loss = mean_absolute_percentage_error(yhat, y_true)
    return loss

In [28]:
calculate_loss(trained_model_inputs)

0.2592912997321396

In [14]:
def train_model(model_inputs, learning_rate=0.01, num_iterations=5000):
    np.random.seed(23)
    
    # Initialize weights and class_weights
    for category in model_inputs.values():
        category["class_weight"] = np.random.random()
        category["weights"] = np.random.random(len(category["weights"]))
        category["weights"] = normalize_weights(category["weights"])
    
    # Define the objective function for optimization
    def objective(params):
        idx = 0
        for category in model_inputs.values():
            category["class_weight"] = params[idx]
            idx += 1
            category["weights"] = params[idx:idx+len(category["weights"])]
            idx += len(category["weights"])
        return calculate_loss(model_inputs)
    
    # Define the bounds for optimization
    bounds = []
    for category in model_inputs.values():
        bounds.append((0, 1))  # Class weight bounds
        bounds.extend([(0, 1)] * len(category["weights"]))  # Weight bounds
    
    # Perform optimization using L-BFGS-B
    initial_params = []
    for category in model_inputs.values():
        initial_params.append(category["class_weight"])
        initial_params.extend(category["weights"])
    
    result = minimize(objective, initial_params, method='L-BFGS-B', bounds=bounds, options={'maxiter': num_iterations})
    
    # Update the optimized weights and class_weights
    optimized_params = result.x
    idx = 0
    for category in model_inputs.values():
        category["class_weight"] = optimized_params[idx]
        idx += 1
        category["weights"] = optimized_params[idx:idx+len(category["weights"])]
        idx += len(category["weights"])
    
    # Calculate and print the final error
    final_error = calculate_loss(model_inputs)
    print(f"Final Error: {final_error:.4f}")
    
    return model_inputs

In [110]:
def calculate_loss(model_inputs):
    yhat = []
    for company in features.index:
        ratios = features.loc[company][model_metrics].to_dict()    
        model = CreditRatingCalculator(model_inputs)
        model.calculate_credit_rating(ratios)

        credit_score = model.credit_score
        credit_rating = model.credit_rating

        yhat.append(credit_score)

    y_true = targets['numeric_rating']
    yhat = np.round(yhat, 1)
    loss = mean_absolute_percentage_error(yhat, y_true)
    print(y_true.values)
    print(yhat)
    return loss

In [111]:
calculate_loss(trained_model_inputs)

[5.5 5.5 4.5 6.5 4.5 5.5 6.5 4.5 6.5 6.5 7.5 5.5 5.5 6.5 5.5 5.5 6.5 6.5
 5.5 6.5 5.5 6.5 5.5 6.5 6.5 6.5 6.5 6.5 5.5 8.5 6.5 6.5 6.5 6.5 6.5 6.5
 6.5 6.5 6.5 6.5 6.5 6.5 6.5 6.5 6.5 6.5 5.5 6.5 5.5 6.5 6.5 6.5 6.5 6.5
 6.5 6.5 6.5 5.5 6.5 6.5 7.5 6.5 6.5 5.5 5.5 6.5 6.5 5.5 6.5 6.5]
[9.1 3.7 6.9 9.3 2.6 3.5 9.4 5.8 6.8 0.2 3.5 3.7 4.  0.2 3.5 9.2 0.2 7.
 0.2 3.8 3.5 0.2 3.6 9.3 9.2 9.2 9.2 3.7 7.  9.3 0.2 5.7 9.  9.  9.4 2.4
 3.8 3.8 4.6 8.1 8.  9.2 4.6 4.6 9.1 6.9 9.3 4.7 4.  6.9 9.1 3.8 3.7 3.7
 3.7 0.3 4.6 4.1 9.2 6.9 6.9 9.  6.9 4.7 2.5 4.2 3.7 4.7 4.7 4.2]


3.305207936385858

In [141]:
def get_buckets(min_val, max_val, lower_is_better=False, num_buckets=9):
    """
    Generates optimized buckets based on min, max values, desired number of buckets, and whether lower values are better.

    Args:
        min_val (float): The minimum value.
        max_val (float): The maximum value.
        num_buckets (int, optional): Number of buckets. Defaults to 9.
        lower_is_better (bool, optional): True if lower values are better, else False. Defaults to False.

    Returns:
        list: List of tuples (start, end) representing each bucket's range.
    """
    min_val, max_val = (max_val, min_val) if lower_is_better else (min_val, max_val)
    interval = (max_val - min_val) / (num_buckets - 1)
    buckets = [(round(min_val + i * interval, 2), round(min_val + (i + 1) * interval, 2)) for i in range(num_buckets - 1)]
    buckets.append((round(max_val - interval, 2), max_val))
    return list(reversed(buckets)) if lower_is_better else buckets


# Redefine the values and number of buckets for clarity
min_val = -50
max_val = 50

# Generate the optimized buckets
buckets_list = get_buckets(min_val,  max_val)
print(buckets_list)

[(-50.0, -37.5), (-37.5, -25.0), (-25.0, -12.5), (-12.5, 0.0), (0.0, 12.5), (12.5, 25.0), (25.0, 37.5), (37.5, 50.0), (37.5, 50)]


In [144]:
max_val = features[model_metrics].max()
min_val = features[model_metrics].min()

In [155]:
for metric in model_metrics:
    buckets = get_buckets(min_val.loc[metric].round(0), max_val.loc[metric].round(0))
    print(metric, buckets)

oper_margin [(-58.0, -42.75), (-42.75, -27.5), (-27.5, -12.25), (-12.25, 3.0), (3.0, 18.25), (18.25, 33.5), (33.5, 48.75), (48.75, 64.0), (48.75, 64.0)]
tot_debt_to_tot_eqy [(4.0, 412.62), (412.62, 821.25), (821.25, 1229.88), (1229.88, 1638.5), (1638.5, 2047.12), (2047.12, 2455.75), (2455.75, 2864.38), (2864.38, 3273.0), (2864.38, 3273.0)]
tot_debt_to_ebitda [(0.0, 7.62), (7.62, 15.25), (15.25, 22.88), (22.88, 30.5), (30.5, 38.12), (38.12, 45.75), (45.75, 53.38), (53.38, 61.0), (53.38, 61.0)]
ebitda_to_tot_int_exp [(-1.0, 2.12), (2.12, 5.25), (5.25, 8.38), (8.38, 11.5), (11.5, 14.62), (14.62, 17.75), (17.75, 20.88), (20.88, 24.0), (20.88, 24.0)]
return_on_asset [(-24.0, -18.38), (-18.38, -12.75), (-12.75, -7.12), (-7.12, -1.5), (-1.5, 4.12), (4.12, 9.75), (9.75, 15.38), (15.38, 21.0), (15.38, 21.0)]
asset_turnover [(0.0, 0.5), (0.5, 1.0), (1.0, 1.5), (1.5, 2.0), (2.0, 2.5), (2.5, 3.0), (3.0, 3.5), (3.5, 4.0), (3.5, 4.0)]
