In [None]:
### install requirements
!pip install duckdb -qq
!pip install arch -qq

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/985.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m286.7/985.3 kB[0m [31m8.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m985.3/985.3 kB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# import requirements
import duckdb

import pandas as pd
import numpy as np
from datetime import date, datetime, timedelta, time

import scipy.stats as si
from scipy.optimize import fmin
from arch import arch_model

import contextlib
import io
import warnings

In [None]:
con = duckdb.connect("/content/drive/MyDrive/MFM project/bankruptcy_data.db")

# get price tables
tables = con.execute("SHOW TABLES").df()
# get financial statements
financial = con.execute("SELECT * FROM financial3").df()
# cross check availablity
available = financial[financial['ticker'].isin(tables['name'])]
test = con.execute("Select * from COST").df()
con.close()

In [None]:
def d1(df, r, T):
    return np.log(df['A'] / (df['total_debt'] * np.exp(-r * T))) / (df['ann_vol'] * np.sqrt(T)) + 0.5 * (df['ann_vol'] * np.sqrt(T))

def findImpliedVals(A, K, sigmaE_E, sigmaA, E, r, T):
    def ImpliedMerton(c):
        d1 = np.log( c[0] / (K * np.exp(-r * T))) / (c[1] * np.sqrt(T)) + 0.5 * (c[1] * np.sqrt(T))
        d2 = d1 - (c[1] * np.sqrt(T))
        f1 = ( c[0] * si.norm.cdf(d1) ) - (K * np.exp(-r * T) * si.norm.cdf(d2)) - E
        f2 =  (si.norm.cdf(d1) * c[1] * c[0] - sigmaE_E) / E
        val = f1**2 + f2**2
        return (val)

    # Suppress optimization output
    with contextlib.redirect_stdout(io.StringIO()), contextlib.redirect_stderr(io.StringIO()):
        c = fmin(ImpliedMerton, [A, sigmaA], disp=False)

    return c

def kmv_default_prob(asset, dpt, mu_asset, sigma_asset, T):
    numerator = np.log(asset / dpt) + (mu_asset - 0.5 * sigma_asset**2) * T
    denominator = sigma_asset * np.sqrt(T)
    return si.norm.cdf(- numerator / denominator)

def zpp_default_prob(sim_num, prices, horizon):
    np.random.seed(42)
    price_paths = np.zeros((horizon, sim_num))

    # Suppress GARCH model output and warnings
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        with contextlib.redirect_stdout(io.StringIO()), contextlib.redirect_stderr(io.StringIO()):

            ## fit AR3-threshold-Garch(1,1) model
            garch_model = arch_model(prices['price_diff'], mean ='AR', lags=3, vol = 'GARCH', p=1, o=1 , q=1)
            garch_model_fit = garch_model.fit()

            ## simulate paths
            forecasts = garch_model_fit.forecast(horizon = horizon, method = 'simulation', simulations = sim_num)

    simulated_changes = forecasts.simulations.values[-1, :, :].T

    for i in range(sim_num):
        for t in range(horizon):
            if t == 0:
                price_paths[t, i] = prices['Close'].iloc[0]
            else:
                price_paths[t, i] = price_paths[t - 1, i] + simulated_changes[t - 1, i]

    ## calculate default probability
    no_default = 0
    for i in range(sim_num):
        path = price_paths[:, i]
        if np.min(path) < 0:
            no_default += 1

    zpp_prob_default = no_default / sim_num # (Number[price path < 0] / total number of simulation)
    return zpp_prob_default

In [None]:
# con = duckdb.connect("/content/drive/MyDrive/MFM project/bankruptcy_data.db")
# con.execute("DROP TABLE IF EXISTS bankrupted_prob_results")
# con.close()

In [None]:
num_sim = 50_000
horizon = 252
r = 0.01
T = 1

# dataframe to store each ticker's results"
prob_results = pd.DataFrame()

for idx, ticker in enumerate(available['ticker'].unique()):
    start_time = datetime.now()
    # get base data for ticker
    con = duckdb.connect("/content/drive/MyDrive/MFM project/bankruptcy_data.db")
    prices = con.execute(f"SELECT * FROM {ticker}").df()
    financial = con.execute(f"SELECT * FROM financial3 WHERE ticker = '{ticker}'").df()
    bankrupt = con.execute(f"SELECT * FROM bankrupted WHERE Ticker = '{ticker}'").df()
    con.close()

    # quarterly placeholder
    quarterly = pd.DataFrame()

    #check availability of at least a year of price data before bankruptcy date
    if (prices['Date'].dt.year.min() <= bankrupt.EndDate.dt.year.min() - 1):
        # filter until the year before bankrupt
        prices = prices[prices['Date'].dt.year <= bankrupt.EndDate.dt.year.min() - 1].copy()
        # calculate log return - for KMV
        prices['log_return'] = np.log(prices['Close'] / prices['Close'].shift(1)).fillna(0)
        # calculate price diff - for ZPP
        prices['price_diff'] = prices['Close'] - prices['Close'].shift(1)
        prices.fillna(0, inplace = True)


        # process by quarter, transforms daily price into quarterly
        prices['quarter_end'] = prices['Date'] + pd.tseries.offsets.QuarterEnd(0)
        quarter_end_price = prices.groupby('quarter_end', as_index = False).last()

        quarterly_vol = prices.groupby('quarter_end', as_index=False).agg({'log_return': 'std'})
        quarterly_vol.columns = ['quarter_end', 'std_log_return']
        quarterly_mean = prices.groupby('quarter_end', as_index=False).agg({'log_return': 'mean'})
        quarterly_mean.columns = ['quarter_end', 'mean_log_return']

        quarterly = quarterly_vol.merge(quarterly_mean, on='quarter_end', how = 'left')

        # merge with financials
        financial['quarter_end'] = pd.to_datetime(financial['date']) + pd.tseries.offsets.QuarterEnd(0)
        quarterly = quarterly.merge(financial, on='quarter_end', how = 'left').dropna()
        quarterly[['shares', 'short_term_debt', 'long_term_debt', 'cash_equivalent']] = quarterly[['shares', 'short_term_debt', 'long_term_debt', 'cash_equivalent']].astype(float)

        # get prices for calculating market cap
        quarterly = quarterly.merge(quarter_end_price[['quarter_end', 'Close']], on = 'quarter_end', how = 'left')

        # process financial statement values
        quarterly['market_cap'] = quarterly['Close'] * quarterly['shares']
        quarterly['total_debt'] = quarterly['short_term_debt'] + quarterly['long_term_debt']
        quarterly['dpt'] = quarterly['short_term_debt'] + 0.5 * quarterly['long_term_debt']
        quarterly['ann_vol'] = quarterly['std_log_return'] * np.sqrt(horizon)
        quarterly['ann_return'] = quarterly['mean_log_return'] * np.sqrt(horizon)

        # drop unnecessary columns
        quarterly = quarterly[['quarter_end', 'market_cap', 'total_debt', 'dpt', 'ann_vol', 'ann_return', 'cash_equivalent']].copy()

        # shift the values by 1 quarter to calculate for information set at that time point
        quarterly[['market_cap', 'total_debt', 'dpt', 'ann_vol', 'ann_return', 'cash_equivalent']] = quarterly[['market_cap', 'total_debt', 'dpt', 'ann_vol', 'ann_return', 'cash_equivalent']].shift(1)
        # shifted values will become NaN
        quarterly.dropna(inplace = True)

        # Let A = asset (assuming total valuation reflect in market_cap), K = total_debt
        quarterly['A'] = quarterly['market_cap'] + quarterly['total_debt'] - quarterly['cash_equivalent']

        # calculate d1, d2
        quarterly['d1'] = d1(quarterly, r, T)
        quarterly['d2'] = quarterly['d1'] - (quarterly['ann_vol'] * np.sqrt(T))

        # calculate Nd1, Nd2
        quarterly['Nd1'] = si.norm.cdf(quarterly['d1'])
        quarterly['Nd2'] = si.norm.cdf(quarterly['d2'])

        # calculate E and sigmaE * E from Merton
        quarterly['E'] = quarterly['A'] * quarterly['Nd1'] - quarterly['total_debt'] * np.exp(-r * T) * quarterly['Nd2']
        quarterly['sigmaE_E'] = quarterly['Nd1'] * quarterly['ann_vol'] * quarterly['A']

        # calculate implied Asset and Asset volatility
        implied_Asset = []
        implied_Vol = []
        for A, K, sigmaE_E, sigmaV, E in zip(quarterly['A'], quarterly['total_debt'], quarterly['sigmaE_E'], quarterly['ann_vol'], quarterly['E']):
            implied_Asset.append(findImpliedVals(A, K, sigmaE_E, sigmaV, E, r, T)[0])
            implied_Vol.append(findImpliedVals(A, K, sigmaE_E, sigmaV, E, r, T)[1])

        quarterly['implied_Asset'] = implied_Asset
        quarterly['implied_Vol'] = implied_Vol

        # calculate KMV probability of default
        quarterly['kmv_prob_default'] = kmv_default_prob(quarterly['implied_Asset'], quarterly['dpt'], quarterly['ann_return'], quarterly['implied_Vol'], T)


        # calculate ZPP probability of default
        zpp_prob_defaults = []
        for quarter in quarterly.quarter_end:
            quarter_prices = prices[prices['quarter_end'] == quarter].copy()
            if (quarter_prices.shape[0] > 0):
                zpp_prob_defaults.append(zpp_default_prob(num_sim, quarter_prices, horizon))
            else:
                zpp_prob_defaults.append(np.nan)
        quarterly['zpp_prob_default'] = zpp_prob_defaults

        quarterly['ticker'] = ticker

        if (quarterly.shape[0] > 0):
            quarterly = quarterly[['ticker', 'quarter_end', 'kmv_prob_default', 'zpp_prob_default']]
            con = duckdb.connect("/content/drive/MyDrive/MFM project/bankruptcy_data.db")
            con.execute("CREATE TABLE IF NOT EXISTS bankrupted_prob_results (ticker TEXT, quarter_end DATE, kmv_prob_default FLOAT, zpp_prob_default FLOAT)")
            con.execute(f"INSERT INTO bankrupted_prob_results SELECT * FROM quarterly")
            con.close()

    print(f"[{idx + 1}/{available['ticker'].nunique()}] - Processed: {ticker}, Result: {quarterly.shape}, Status: {'Success' * (quarterly.shape[0] > 1)}, Time: {datetime.now() - start_time}")

[1/60] - Processed: IMUC, Result: (0, 0), Status: , Time: 0:00:00.083359
[2/60] - Processed: RNVA, Result: (13, 4), Status: Success, Time: 0:04:00.640804
[3/60] - Processed: AERG, Result: (0, 0), Status: , Time: 0:00:00.115034
[4/60] - Processed: GXXM, Result: (7, 4), Status: Success, Time: 0:02:04.880774
[5/60] - Processed: YUMAQ, Result: (11, 4), Status: Success, Time: 0:03:17.301987


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


[6/60] - Processed: GLAE, Result: (25, 4), Status: Success, Time: 0:07:22.588932
[7/60] - Processed: BMTM, Result: (21, 4), Status: Success, Time: 0:06:21.687336
[8/60] - Processed: WAYS, Result: (21, 4), Status: Success, Time: 0:06:12.781360
[9/60] - Processed: SYNE, Result: (23, 4), Status: Success, Time: 0:06:52.547683


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


[10/60] - Processed: KGJI, Result: (21, 4), Status: Success, Time: 0:06:14.854348
[11/60] - Processed: ADYX, Result: (19, 4), Status: Success, Time: 0:05:37.573361
[12/60] - Processed: GAHC, Result: (27, 4), Status: Success, Time: 0:08:04.671455
[13/60] - Processed: CBKCQ, Result: (21, 4), Status: Success, Time: 0:06:10.576441
[14/60] - Processed: NMGX, Result: (21, 4), Status: Success, Time: 0:06:18.481182
[15/60] - Processed: GENN, Result: (21, 4), Status: Success, Time: 0:06:19.928327


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


[16/60] - Processed: RBCN, Result: (25, 4), Status: Success, Time: 0:07:28.079331


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


[17/60] - Processed: SCPS, Result: (5, 4), Status: Success, Time: 0:01:30.057800
[18/60] - Processed: IWSY, Result: (26, 4), Status: Success, Time: 0:07:52.690060
[19/60] - Processed: ACUR, Result: (25, 4), Status: Success, Time: 0:07:27.218986


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


[20/60] - Processed: WSTL, Result: (24, 4), Status: Success, Time: 0:07:10.358924
[21/60] - Processed: WINR, Result: (16, 4), Status: Success, Time: 0:04:48.750048
[22/60] - Processed: ATRX, Result: (27, 4), Status: Success, Time: 0:08:02.027210


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


[23/60] - Processed: TCCO, Result: (28, 4), Status: Success, Time: 0:08:33.768967


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


[24/60] - Processed: ANDR, Result: (25, 4), Status: Success, Time: 0:07:29.611755
[25/60] - Processed: ABMC, Result: (25, 4), Status: Success, Time: 0:07:32.442078
[26/60] - Processed: SBSAA, Result: (18, 4), Status: Success, Time: 0:05:26.035758
[27/60] - Processed: NAUH, Result: (17, 4), Status: Success, Time: 0:05:02.582492


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


[28/60] - Processed: BDRL, Result: (25, 4), Status: Success, Time: 0:07:26.132680
[29/60] - Processed: SIMPQ, Result: (24, 4), Status: Success, Time: 0:07:12.111927
[30/60] - Processed: RENO, Result: (25, 4), Status: Success, Time: 0:07:15.438921
[31/60] - Processed: AFIIQ, Result: (23, 4), Status: Success, Time: 0:06:53.509663
[32/60] - Processed: KLDO, Result: (11, 4), Status: Success, Time: 0:03:20.285159
[33/60] - Processed: EVLO, Result: (19, 4), Status: Success, Time: 0:05:42.796770


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


[34/60] - Processed: LADX, Result: (25, 4), Status: Success, Time: 0:07:22.595279


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


[35/60] - Processed: YAYO, Result: (12, 4), Status: Success, Time: 0:03:32.878234
[36/60] - Processed: IMCI, Result: (24, 4), Status: Success, Time: 0:07:07.620633
[37/60] - Processed: RGRX, Result: (24, 4), Status: Success, Time: 0:07:10.315546


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


[38/60] - Processed: TKOI, Result: (26, 4), Status: Success, Time: 0:07:38.104436
[39/60] - Processed: USRM, Result: (27, 4), Status: Success, Time: 0:07:44.188549
[40/60] - Processed: FALC, Result: (30, 4), Status: Success, Time: 0:08:55.868392


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


[41/60] - Processed: CALA, Result: (26, 4), Status: Success, Time: 0:07:31.080162
[42/60] - Processed: AMTY, Result: (24, 4), Status: Success, Time: 0:06:53.876947
[43/60] - Processed: MGTI, Result: (26, 4), Status: Success, Time: 0:07:45.702077
[44/60] - Processed: MTEM, Result: (24, 4), Status: Success, Time: 0:07:05.308791
[45/60] - Processed: BSFC, Result: (14, 4), Status: Success, Time: 0:04:09.206780
[46/60] - Processed: PFTA, Result: (9, 4), Status: Success, Time: 0:02:38.624490


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


[47/60] - Processed: GHSI, Result: (18, 4), Status: Success, Time: 0:05:22.151301
[48/60] - Processed: SMFL, Result: (7, 4), Status: Success, Time: 0:02:03.263087
[49/60] - Processed: BGXX, Result: (7, 4), Status: Success, Time: 0:02:05.007334
[50/60] - Processed: BIGGQ, Result: (24, 4), Status: Success, Time: 0:06:59.700437


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


[51/60] - Processed: CLOE, Result: (9, 4), Status: Success, Time: 0:02:44.027657
[52/60] - Processed: EFTR, Result: (9, 4), Status: Success, Time: 0:02:36.126003
[53/60] - Processed: NOVA, Result: (22, 4), Status: Success, Time: 0:06:35.971732
[54/60] - Processed: WGHTQ, Result: (28, 4), Status: Success, Time: 0:08:22.926487
[55/60] - Processed: AXDXQ, Result: (29, 4), Status: Success, Time: 0:08:38.075942
[56/60] - Processed: VINC, Result: (19, 4), Status: Success, Time: 0:05:42.000648
[57/60] - Processed: SCPX, Result: (27, 4), Status: Success, Time: 0:07:46.792449
[58/60] - Processed: AIEV, Result: (10, 4), Status: Success, Time: 0:02:53.248921


  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


[59/60] - Processed: VRPX, Result: (17, 4), Status: Success, Time: 0:05:06.420043
[60/60] - Processed: MEHCQ, Result: (17, 4), Status: Success, Time: 0:04:57.511484
