In [1]:
import numpy as np
import pandas as pd
from quantstats import extend_pandas
extend_pandas()

In [2]:
df = pd.read_csv("JALSH Index_dataset_2000_2024_clean.csv", index_col=0, header=[0, 1])
classfier = pd.read_excel("classification_data.xlsx", index_col=0)

In [3]:
print("Sector")
print(classfier.sector.unique())
print("Industry")
print(classfier.industry.unique())

Sector
['Financial' 'Consumer, Non-cyclical' 'Industrial' 'Basic Materials'
 'Diversified' 'Consumer, Cyclical' 'Communications' 'Technology' 'Energy']
Industry
['Banks' 'Commercial Services' 'Electronics' 'Miscellaneous Manufactur'
 'Diversified Finan Serv' 'Building Materials' 'Mining'
 'Investment Companies' 'Pharmaceuticals' 'Beverages' 'Agriculture'
 'REITS' 'Holding Companies-Divers' 'Distribution/Wholesale' 'Food'
 'Telecommunications' 'Computers' 'Retail' 'Insurance' 'Real Estate'
 'Coal' 'Transportation' 'Entertainment' 'Auto Parts&Equipment'
 'Iron/Steel' 'Software' 'Healthcare-Services' 'Private Equity'
 'Energy-Alternate Sources' 'Forest Products&Paper' 'Internet' 'Chemicals'
 'Engineering&Construction' 'Lodging']


In [4]:
profitability = ['ebitda_margin', 'oper_margin', 'return_on_asset']
liquidity = ['tot_debt_to_ebitda', 'tot_debt_to_tot_asset', 'tot_debt_to_tot_cap', 'tot_debt_to_tot_eqy', 'interest_coverage_ratio', 'ebitda_to_tot_int_exp']
efficiency = ['invent_to_sales', 'asset_turnover']
bloomberg_metrics = ['tot_debt_to_tot_eqy', 'interest_coverage_ratio', 'return_on_asset', 'tot_debt_to_ebitda', 'ebitda_to_tot_int_exp']

In [5]:
def filter_securities(filters, data=classfier):
    
    # Apply filters directly on the transposed
    filtered_data = data.query(
        ' and '.join(f'`{k}` == "{v}"' for k, v in filters.items())
    )
    
    return list(filtered_data.index)


def get_metrics(df):
    df = df.copy()
    individual_metrics = {}

    # List all metrics together for iteration
    all_metrics = profitability + liquidity + efficiency

    # Calculate the average of each metric individually for each stock
    for stock in df.columns.levels[0]:
        individual_metrics[stock] = {}
        for metric in all_metrics:
            if metric in df[stock].columns:
                individual_metrics[stock][metric] = df[stock][metric].mean()

    # Convert the results to a DataFrame for better presentation
    return pd.DataFrame(individual_metrics)

In [6]:
securities = filter_securities({"sector":"Communications", "industry": "Telecommunications"})

metrics = get_metrics(df)
sector_secuirties = metrics.loc[bloomberg_metrics, securities]

In [8]:
expert_thresholds = {
  "tot_debt_to_tot_eqy": {
    1: (0, 0.5),
    0: (0.5, 2),
    -1: (2, float("inf")),
  },
  "interest_coverage_ratio": {
    1: (8, float("inf")),
    0: (3, 8),
    -1: (0, 3),
  },
  "return_on_asset": {
    1: (0.2, float("inf")),
    0: (0.05, 0.2),
    -1: (0, 0.05),
  },
  "tot_debt_to_ebitda": {
    1: (0, 2),
    0: (2, 4),
    -1: (4, float("inf")),
  },
  "ebitda_to_tot_int_exp": {
    1: (8, float("inf")),
    0: (3, 8),
    -1: (0, 3),
  },
}


In [9]:
def create_industry_thresholds(df, metric_directions):
    industry_thresholds = {}

    for metric in df.index:
        q25 = df.loc[metric].quantile(0.25)
        q75 = df.loc[metric].quantile(0.75)
        
        mn = df.loc[metric].min()
        mx = df.loc[metric].max()
        
        if metric_directions[metric] == "higher_better":
            industry_thresholds[metric] = {
                1: (q75, np.inf),
                0: (q25, q75),
                -1: (-np.inf, q25)
            }
        elif metric_directions[metric] == "lower_better":
            industry_thresholds[metric] = {
                1: (-np.inf, q25),
                0: (q25, q75),
                -1: (q75, np.inf)
            }
        else:
            raise ValueError(f"Invalid direction specified for metric '{metric}'")

    return industry_thresholds

def classify_stock_metrics(df, thresholds):
    classified_df = df.copy()

    for metric, threshold in thresholds.items():
        if metric in df.index:
            for category, bounds in threshold.items():
                lower, upper = bounds
                classified_df.loc[metric] = classified_df.loc[metric].apply(
                    lambda x: category if lower <= float(x) < upper else classified_df.loc[metric, classified_df.columns[0]]
                )
        else:
            print(f"Metric '{metric}' not found in the DataFrame. Skipping classification for this metric.")

    return classified_df

def assess_creditworthiness(classified_metrics, weights):
    scores = {
        1: 3,
        0: 2,
        -1: 1
    }

    creditworthiness_scores = {}
    default_probabilities = {}

    for stock in classified_metrics.columns:
        stock_score = 0
        for metric, weight in weights.items():
            if metric in classified_metrics.index:
                category = classified_metrics.loc[metric, stock]
                stock_score += scores[category] * weight
            else:
                print(f"Metric '{metric}' not found for stock '{stock}'. Skipping this metric.")
        
        creditworthiness_scores[stock] = stock_score

        # Calculate the probability of default
        max_score = sum(weight * 3 for weight in weights.values())
        min_score = sum(weight * 1 for weight in weights.values())
        normalized_score = (stock_score - min_score) / (max_score - min_score)
        default_probability = 1 - normalized_score
        default_probabilities[stock] = default_probability

    return creditworthiness_scores

In [29]:
def classify_stock_metrics(df, thresholds, metric_directions):
    classified_df = df.copy()

    for metric, threshold in thresholds.items():
        if metric in df.index:
            for category, bounds in threshold.items():
                lower, upper = bounds
                # Determine if higher or lower values are better for the current metric
                direction = metric_directions.get(metric)

                # Apply classification logic based on the metric's direction
                if direction == "lower_better":
                    classified_df.loc[metric] = classified_df.loc[metric].apply(
                        lambda x: category if lower <= float(x) < upper else classified_df.loc[metric, classified_df.columns[0]]
                    )
                elif direction == "higher_better":
                    classified_df.loc[metric] = classified_df.loc[metric].apply(
                        lambda x: category if lower < float(x) <= upper else classified_df.loc[metric, classified_df.columns[0]]
                    )
        else:
            print(f"Metric '{metric}' not found in the DataFrame. Skipping classification for this metric.")

    return classified_df


In [30]:
classify_stock_metrics(sector_secuirties, industry_thresholds, metric_directions)

Unnamed: 0,BLU SJ Equity,MTN SJ Equity,TKG SJ Equity,VOD SJ Equity
tot_debt_to_tot_eqy,1.0,1.0,1.0,1.0
interest_coverage_ratio,-1.0,-1.0,-1.0,0.0
return_on_asset,-1.0,-1.0,-1.0,-1.0
tot_debt_to_ebitda,0.0,0.0,0.0,0.0
ebitda_to_tot_int_exp,-1.0,-1.0,-1.0,-1.0


In [10]:
metric_directions = {
    "tot_debt_to_tot_eqy": "lower_better",
    "interest_coverage_ratio": "higher_better",
    "return_on_asset": "higher_better",
    "tot_debt_to_ebitda": "lower_better",
    "ebitda_to_tot_int_exp": "higher_better"
}

weights = {
    "tot_debt_to_tot_eqy": 0.25,  
    "interest_coverage_ratio": 0.30,  
    "return_on_asset": 0.10,  
    "tot_debt_to_ebitda": 0.25,  
    "ebitda_to_tot_int_exp": 0.10 
}

industry_thresholds = create_industry_thresholds(sector_secuirties, metric_directions)
classified_metrics = classify_stock_metrics(sector_secuirties, industry_thresholds) # or expert_thresholds
creditworthiness_scores = assess_creditworthiness(classified_metrics, weights)

In [11]:
print("Creditworthiness Scores:")
print(creditworthiness_scores)

Creditworthiness Scores:
{'BLU SJ Equity': 1.7500000000000002, 'MTN SJ Equity': 1.7500000000000002, 'TKG SJ Equity': 1.7500000000000002, 'VOD SJ Equity': 2.0500000000000003}


In [13]:
classified_metrics

Unnamed: 0,BLU SJ Equity,MTN SJ Equity,TKG SJ Equity,VOD SJ Equity
tot_debt_to_tot_eqy,1.0,1.0,1.0,1.0
interest_coverage_ratio,-1.0,-1.0,-1.0,0.0
return_on_asset,-1.0,-1.0,-1.0,-1.0
tot_debt_to_ebitda,0.0,0.0,0.0,0.0
ebitda_to_tot_int_exp,-1.0,-1.0,-1.0,-1.0


In [21]:
sector_secuirties.loc["tot_debt_to_tot_eqy"]

BLU SJ Equity    32.809328
MTN SJ Equity    65.116785
TKG SJ Equity    56.593172
VOD SJ Equity    65.004940
Name: tot_debt_to_tot_eqy, dtype: float64

In [22]:
sector_secuirties.loc["tot_debt_to_tot_eqy"].quantile(.25)


50.647210600805735

In [12]:
sector_secuirties

Unnamed: 0,BLU SJ Equity,MTN SJ Equity,TKG SJ Equity,VOD SJ Equity
tot_debt_to_tot_eqy,32.809328,65.116785,56.593172,65.00494
interest_coverage_ratio,6.346575,8.547827,3.495262,9.790341
return_on_asset,4.21524,7.405181,6.40137,16.143841
tot_debt_to_ebitda,1.150158,1.305329,1.430977,0.793112
ebitda_to_tot_int_exp,7.547418,13.11413,8.796523,13.355185


In [43]:
def create_industry_thresholds_ratios(df, metric_directions):
    industry_thresholds = {}

    for metric in df.index:
        q25 = df.loc[metric].quantile(0.25)
        q75 = df.loc[metric].quantile(0.75)
        
        mn = df.loc[metric].min()
        mx = df.loc[metric].max()
        
        if metric_directions[metric] == "higher_better":
            industry_thresholds[metric] = q25, np.inf,
                
        elif metric_directions[metric] == "lower_better":
            industry_thresholds[metric] = -np.inf, q25
        else:
            raise ValueError(f"Invalid direction specified for metric '{metric}'")
    return industry_thresholds

In [45]:
rh = create_industry_thresholds_ratios(sector_secuirties, metric_directions)

In [46]:
def score_ratio(ratio, healthy_range, inverse=False):
    """
    Converts a financial ratio to a score based on its deviation from a healthy range.
    :param ratio: The financial ratio value.
    :param healthy_range: A tuple indicating the healthy range of values for the ratio.
    :param inverse: Set to True if lower values are better for the ratio.
    :return: A score as a float.
    """
    if inverse:
        if ratio < healthy_range[0]:
            return (healthy_range[0] - ratio) / healthy_range[0]
        elif ratio > healthy_range[1]:
            return -(ratio - healthy_range[1]) / healthy_range[1]
    else:
        if ratio > healthy_range[1]:
            return (ratio - healthy_range[1]) / healthy_range[1]
        elif ratio < healthy_range[0]:
            return -(healthy_range[0] - ratio) / healthy_range[0]
    return 0  # Ratio is within the healthy range

In [60]:
df = sector_secuirties.T.copy()

In [36]:
import joblib

In [38]:
joblib.dump(industry_thresholds, "metrics.joblib")

['metrics.joblib']

In [62]:
# Classification values and ranges
classification_values = {'Poor': 1, 'Fair': 2, 'Good': 3}

classification_ranges = {
    'Return_on_Assets': {'Poor': (None, 0.02), 'Fair': (0.02, 0.05), 'Good': (0.05, None)},
    'Operating_Margin_Ratio': {'Poor': (None, 0.10), 'Fair': (0.10, 0.20), 'Good': (0.20, None)},
    'Net_Profit_Margin': {'Poor': (None, 0.05), 'Fair': (0.05, 0.10), 'Good': (0.10, None)},
    'Debt_to_EBITDA': {'Good': (None, 2.0), 'Fair': (2.0, 4.0), 'Poor': (4.0, None)},  # Note: Inverted logic for Debt to EBITDA
    'EBITDA_to_Interest_Expense': {'Poor': (None, 2.0), 'Fair': (2.0, 4.0), 'Good': (4.0, None)},
    'Asset_Turnover': {'Poor': (None, 0.5), 'Fair': (0.5, 1.5), 'Good': (1.5, None)}
}

# Function to classify metric values into "Poor", "Fair", and "Good"
def classify_metric(value, metric):
    ranges = classification_ranges[metric]
    for classification, (low, high) in ranges.items():
        if low is None and value <= high:
            return classification
        elif high is None and value > low:
            return classification
        elif low is not None and high is not None and low <= value <= high:
            return classification
    return "Unknown"

# Apply classification for each metric
for metric in industry_thresholds.keys():
    df[f'{metric}_Class'] = df[metric].apply(lambda x: classify_metric(x, metric))

# # Function to calculate overall score and classification
# def calculate_and_classify_overall(row):
#     scores = [classification_values[row[f'{metric}_Class']] for metric in classification_ranges]
#     overall_score = sum(scores) / len(scores)
#     overall_class = 'Good' if overall_score > 2.5 else 'Fair' if overall_score > 1.5 else 'Poor'
#     return pd.Series([overall_score, overall_class], index=['Overall_Score', 'Overall_Class'])

# # Calculate overall scores and classifications
# df[['Overall_Score', 'Overall_Class']] = df.apply(calculate_and_classify_overall, axis=1)

KeyError: 'tot_debt_to_tot_eqy'

In [64]:
df["tot_debt_to_tot_eqy"]

BLU SJ Equity    32.809328
MTN SJ Equity    65.116785
TKG SJ Equity    56.593172
VOD SJ Equity    65.004940
Name: tot_debt_to_tot_eqy, dtype: float64