In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial import distance


import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")

# Data Analysis

In [5]:
df = pd.read_csv("JALSH Index_dataset_2000_2024_clean.csv", index_col=0, header=[0, 1])
classfier = pd.read_excel("classification_data.xlsx", index_col=0)
metrics = pd.read_excel("metrics_full.xlsx", index_col=0)

In [6]:
inverse_relationships = [False, True, False, False, True, False]

profitability = ['ebitda_margin', 'oper_margin', 'return_on_asset']
liquidity = ['tot_debt_to_ebitda', 'tot_debt_to_tot_asset', 'tot_debt_to_tot_cap', 'tot_debt_to_tot_eqy', 'interest_coverage_ratio', 'ebitda_to_tot_int_exp']
efficiency = ['invent_to_sales', 'asset_turnover']
bloomberg_metrics = ["oper_margin", 'tot_debt_to_tot_eqy', 'interest_coverage_ratio', 'return_on_asset', 'tot_debt_to_ebitda', 'ebitda_to_tot_int_exp']

In [132]:
def filter_securities(filters, data=classfier):
    
    # Apply filters directly on the transposed
    filtered_data = data.query(
        ' and '.join(f'`{k}` == "{v}"' for k, v in filters.items())
    )
    
    return list(filtered_data.index)


def calculate_overall_credit_score(credit_scores, weights = None):
    """
    Calculate the overall credit score and rating based on individual financial ratio credit scores.

    Args:
        credit_scores (list): List of credit scores for each financial ratio.

    Returns:
        tuple: A tuple containing the overall credit score (float) and overall credit rating (str).
    """
    
    max_score = 100
    min_score = 0
    
    if weights:
        overall_credit_score = sum(score * weight for score, weight in zip(credit_scores, weights)) / sum(weights)
    
    else:
        overall_credit_score = sum(credit_scores) / len(credit_scores)

    # Rescale the overall credit score to have a maximum rating of 100
    rescaled_credit_score = (overall_credit_score - min_score) / (max_score - min_score) * 100
    
    if overall_credit_score >= 75:
        overall_credit_rating = "Good"
    elif overall_credit_score >= 50:
        overall_credit_rating = "Fair"
    else:
        overall_credit_rating = "Poor"
    
    return overall_credit_score, overall_credit_rating

def calculate_credit_score(financial_ratio, industry_thresholds, expert_thresholds, inverse_relationship=False):
    """
    Calculate the actual credit score within a universal score range based on where the financial ratio
    falls within its thresholds, alongside the credit rating, confidence level, and universal range scores.
    
    Args:
        financial_ratio (float): Financial ratio value.
        industry_thresholds (dict): Dictionary of industry thresholds for each rating.
        expert_thresholds (dict): Dictionary of expert thresholds for each rating.
        inverse_relationship (bool): Indicates if the financial ratio has an inverse relationship (lower is better).

    Returns:
        tuple: A tuple containing the actual credit score (int), credit rating (str),
               confidence level (float), and universal score ranges (dict).
    """
    
    # Adjust thresholds and financial ratio for inverse relationships
    if inverse_relationship:
        financial_ratio = 1 / financial_ratio
        industry_thresholds = {k: 1/v for k, v in industry_thresholds.items()}
        expert_thresholds = {k: 1/v for k, v in expert_thresholds.items()}
    
    distances = {
        rating: distance.euclidean(
            [financial_ratio],
            [(industry_thresholds[rating] + expert_thresholds[rating]) / 2])
        for rating in ['good', 'fair', 'poor']
    }
    
    credit_rating = min(distances, key=distances.get)
    
    # Universal score ranges for each rating category
    score_ranges = {
        'good': (75, 100),
        'fair': (50, 74),
        'poor': (0, 49)
    }

    # Determine the min and max threshold for the financial ratio
    min_threshold = min(industry_thresholds.values())
    max_threshold = max(industry_thresholds.values())
    
    # Calculate where the financial ratio falls within its thresholds
    financial_ratio = max(min_threshold, min(financial_ratio, max_threshold))
    ratio_position = (financial_ratio - min_threshold) / (max_threshold - min_threshold)
    
    # Apply this proportion to the corresponding universal score range
    credit_score_range = score_ranges[credit_rating]
    score_range_width = credit_score_range[1] - credit_score_range[0]
    credit_score = credit_score_range[0] + ratio_position * score_range_width
    
    # Calculate confidence level
    confidence_level = (credit_score - credit_score_range[0]) / score_range_width
    confidence_level = max(0, min(1, confidence_level))
    
    return credit_score, credit_rating, confidence_level, score_ranges

def get_industry_thresholds(df, inverse_relationships, metrics=[0.25, 0.50, 0.75]):
    industry_thresholds = []

    for metric in df.index:
        q25, q50, q75 = df.loc[metric].quantile(metrics)
        
        if inverse_relationships.get(metric):
            thresholds = {"good": q25, "fair": q50, "poor": q75}
        else:
            thresholds = {"good": q75, "fair": q50, "poor": q25}
        
        industry_thresholds.append(thresholds)

    return industry_thresholds

def get_stock_metrics(df):
    dict_df = df.to_dict()
    return {stock: list(dict_df[stock].values()) for stock in dict_df}


# Score ranges
score_ranges = {
    'good': (75, 100),
    'fair': (50, 74),
    'poor': (0, 49)
}

# Function to assign categories
def assign_category(value):
    for category, (min_score, max_score) in score_ranges.items():
        if min_score <= value <= max_score:
            return category
    return 'unknown'

In [133]:
print(classfier.industry.unique())
print(classfier.sector.unique())

['Banks' 'Commercial Services' 'Electronics' 'Miscellaneous Manufactur'
 'Diversified Finan Serv' 'Building Materials' 'Mining'
 'Investment Companies' 'Pharmaceuticals' 'Beverages' 'Agriculture'
 'REITS' 'Holding Companies-Divers' 'Distribution/Wholesale' 'Food'
 'Telecommunications' 'Computers' 'Retail' 'Insurance' 'Real Estate'
 'Coal' 'Transportation' 'Entertainment' 'Auto Parts&Equipment'
 'Iron/Steel' 'Software' 'Healthcare-Services' 'Private Equity'
 'Energy-Alternate Sources' 'Forest Products&Paper' 'Internet' 'Chemicals'
 'Engineering&Construction' 'Lodging']
['Financial' 'Consumer, Non-cyclical' 'Industrial' 'Basic Materials'
 'Diversified' 'Consumer, Cyclical' 'Communications' 'Technology' 'Energy']


In [134]:
universe = {"sector":"Financial", "industry": "Banks"}


In [135]:
securities = filter_securities(universe)
df = metrics.loc[bloomberg_metrics, securities]

In [136]:
# company = {'STUDY_COMPANY': {'oper_margin': 4.000646844741706,
#                              'tot_debt_to_tot_eqy': 32.80932764816569,
#                              'interest_coverage_ratio': 6.346575419010469,
#                              'return_on_asset': 4.215239913853519,
#                              'tot_debt_to_ebitda': 1.150157826723298,
#                              'ebitda_to_tot_int_exp': 7.547418258086141}}



# company = pd.DataFrame(company)
# df = df.join(company)

In [137]:
df = df.dropna()

In [138]:
df

Unnamed: 0,ABG SJ Equity,FSR SJ Equity,INP SJ Equity,NED SJ Equity,SBK SJ Equity
oper_margin,28.202722,34.390613,20.55458,27.343137,32.856516
tot_debt_to_tot_eqy,242.937113,55.520414,279.286615,90.53576,130.337695
return_on_asset,1.233318,1.674277,0.788019,1.094465,1.158278


In [139]:
inverse_relationships = {
    'oper_margin': False,   
    'return_on_asset': False,   
    'tot_debt_to_tot_asset': True,  
    'tot_debt_to_tot_cap': True, 
    'tot_debt_to_tot_eqy': True, 
    'asset_turnover': False,
    'ebitda_margin': False ,
    'tot_debt_to_ebitda': True,
    'interest_coverage_ratio': False,
    'ebitda_to_tot_int_exp': False, 
    'invent_to_sales': True
}


inverse_relationships = {
    'oper_margin': False,   
    'return_on_asset': False,   
    'tot_debt_to_tot_asset': True,  
    'tot_debt_to_tot_cap': True, 
    'tot_debt_to_tot_eqy': True, 
    'asset_turnover': False,
    'ebitda_margin': False ,
    'tot_debt_to_ebitda': True,
    'interest_coverage_ratio': False,
    'ebitda_to_tot_int_exp': False, 
    'invent_to_sales': True
}

In [140]:
get_industry_thresholds(df, inverse_relationships)

[{'good': 32.85651586270942,
  'fair': 28.20272223399595,
  'poor': 27.34313663121127},
 {'good': 90.5357597700805,
  'fair': 130.3376947345469,
  'poor': 242.9371134519588},
 {'good': 1.233318242973886,
  'fair': 1.158278329553393,
  'poor': 1.094464585710489}]

In [141]:
weights_values = [.15, .20, .20, .15, .15, .15]

company_ratios = get_stock_metrics(df)
industry_thresholds = get_industry_thresholds(df, inverse_relationships)
expert_thresholds = get_industry_thresholds(df, inverse_relationships)

In [142]:
# Iterate over each company and calculate credit scores for each financial ratio
data  = {}

print(f"\t\t\t\t{universe}")
for company, ratios in company_ratios.items():
    
    
    credit_scores = []
    print(company)
    print("----" * 20)
    for i, ratio in enumerate(ratios):
        
        metric = bloomberg_metrics[i]
        
        credit_score, credit_rating, confidence_level, range_scores = calculate_credit_score(
            ratio, industry_thresholds[i], expert_thresholds[i], inverse_relationships.get(metric)
        )

        credit_scores.append(credit_score)
        
        
        print(f"Financial Ratio: {metric}: {ratio}")
        print(f"  Credit Score: {credit_score}")
        print(f"  Credit Rating: {credit_rating}")
        print(f"  Confidence Level: {confidence_level:.2%}")
        print(f"  Range Scores:")
        for rating, scores in range_scores.items():
            print(f"      {rating.capitalize()}: {scores[0]:.2f} - {scores[1]:.2f}")
        print()


    overall_credit_score, overall_credit_rating = calculate_overall_credit_score(credit_scores, weights_values)
    
    #if company == "STUDY_COMPANY":

    print(f"Overall Credit Score: {overall_credit_score:.2f}")
    print(f"Overall Credit Rating: {overall_credit_rating}")
    print()

    data[company] = credit_scores

				{'sector': 'Financial', 'industry': 'Banks'}
ABG SJ Equity
--------------------------------------------------------------------------------
Financial Ratio: oper_margin: 28.20272223399595
  Credit Score: 53.7418166972756
  Credit Rating: fair
  Confidence Level: 15.59%
  Range Scores:
      Good: 75.00 - 100.00
      Fair: 50.00 - 74.00
      Poor: 0.00 - 49.00

Financial Ratio: tot_debt_to_tot_eqy: 242.9371134519588
  Credit Score: 0.0
  Credit Rating: poor
  Confidence Level: 0.00%
  Range Scores:
      Good: 75.00 - 100.00
      Fair: 50.00 - 74.00
      Poor: 0.00 - 49.00

Financial Ratio: interest_coverage_ratio: 1.233318242973886
  Credit Score: 100.0
  Credit Rating: good
  Confidence Level: 100.00%
  Range Scores:
      Good: 75.00 - 100.00
      Fair: 50.00 - 74.00
      Poor: 0.00 - 49.00

[8.06127250459134, 0.0, 20.0]


TypeError: unsupported operand type(s) for -: 'list' and 'int'

In [143]:
df

Unnamed: 0,ABG SJ Equity,FSR SJ Equity,INP SJ Equity,NED SJ Equity,SBK SJ Equity
oper_margin,28.202722,34.390613,20.55458,27.343137,32.856516
tot_debt_to_tot_eqy,242.937113,55.520414,279.286615,90.53576,130.337695
return_on_asset,1.233318,1.674277,0.788019,1.094465,1.158278


In [145]:
sum([8.06127250459134, 0.0, 20.0]) 

28.06127250459134

In [45]:
# # Apply the function to the entire dataframe
# df_categories = pd.DataFrame(data, bloomberg_metrics).applymap(assign_category)
# df_categories