# Credit Risk

In [59]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import plotly.graph_objects as go
import statsmodels.api as sm

# df = pd.read_csv("../data/corp_bonds.csv")
df = pd.read_csv("https://www.dropbox.com/s/gozq6475ljbqjcz/corp_bonds.csv?dl=1")

# Clean-up data
df['ytm'] = df.YIELD.str.rstrip('%').astype('float') / 100.0
df[['ytm','YIELD']]



Columns (42) have mixed types. Specify dtype option on import or set low_memory=False.



Unnamed: 0,ytm,YIELD
0,0.029320,2.932%
1,0.047480,4.748%
2,0.054110,5.411%
3,0.033440,3.344%
4,0.047860,4.786%
...,...,...
111525,0.006693,.6693%
111526,0.007126,.7126%
111527,0.005649,.5649%
111528,0.006799,.6799%


In [60]:
df.columns

Index(['DATE', 'ISSUE_ID', 'CUSIP', 'bond_sym_id', 'bsym', 'company_symbol',
       'BOND_TYPE', 'CONV', 'PRINCIPAL_AMT', 'MATURITY', 'COUPON',
       'DAY_COUNT_BASIS', 'DATED_DATE', 'FIRST_INTEREST_DATE',
       'LAST_INTEREST_DATE', 'NCOUPS', 'AMOUNT_OUTSTANDING', 'R_SP', 'R_MR',
       'R_FR', 'N_SP', 'N_MR', 'N_FR', 'RATING_NUM', 'RATING_CAT',
       'RATING_CLASS', 'YIELD', 'PRICE_EOM', 'PRICE_LDM', 'PRICE_L5M',
       'COUPMONTH', 'nextcoup', 'COUPAMT', 'COUPACC', 'RET_EOM', 'RET_LDM',
       'RET_L5M', 'TMT', 'REMCOUPS', 'DURATION', 'DEFAULTED', 'DEFAULT_DATE',
       'DEFAULT_TYPE', 'ytm'],
      dtype='object')

In [61]:
# List of distinct tickers
df.company_symbol.unique()

array(['BNPQF', 'BNPQ', 'TRP', ..., 'AGREC', 'TME', 'PWR'], dtype=object)

In [62]:
# Number of distinct bonds
len(df.company_symbol.unique())

1183

In [29]:
# Number of distinct bonds
len(df.ISSUE_ID.unique())

27553

## Visualization of A Company's Yield Curve

In [31]:
# Subset the data
ticker = 'AAPL'
date = 20200930
df_subset = df[df.company_symbol==ticker]
df_subset = df_subset[df_subset.DATE==date]

In [32]:
# A second method to do subsetting all at once
df_subset = df[(df.company_symbol==ticker) & (df.DATE==date)]

In [33]:
# Plot
fig = go.Figure()
trace= go.Scatter(x=df_subset.TMT, y=df_subset.ytm, mode='markers')
fig.add_trace(trace)
fig.update_xaxes(title='Time-to-maturity (years)',tickformat=".1f",type="log")
fig.update_yaxes(title='Yield',tickformat=".2%")
fig.update_layout(title=ticker + ' Yield Curve as of ' + str(date))
fig.update_layout(legend=dict(yanchor="top", y =0.99, xanchor="left", x=0.01))
fig.show()

In [14]:
# A function to do the same thing
def yield_curve(ticker,date):
    d = df[(df.company_symbol==ticker) & (df.DATE==date)]
    fig = go.Figure()
    trace= go.Scatter(x=d.TMT, y=d.ytm, mode='markers')
    fig.add_trace(trace)
    fig.update_xaxes(title='Time-to-maturity (years)',tickformat=".1f",type="log")
    fig.update_yaxes(title='Yield',tickformat=".2%")
    fig.update_layout(title=ticker + ' Yield Curve as of ' + str(date))
    fig.update_layout(legend=dict(yanchor="top", y =0.99, xanchor="left", x=0.01))
    fig.show()
    return

In [34]:
df.DATE.unique()

array([20200131, 20200331, 20200430, 20200630, 20200731, 20200831,
       20200930, 20200229, 20200531], dtype=int64)

In [35]:
yield_curve('AAPL',20200229)
yield_curve('AAPL',20200930)

In [37]:
# Pre- and post-pandemic yield curve
ticker = 'IBM'
pre = 20200229
post= 20200930

fig = go.Figure()

# Pre-covid
d = df[(df.company_symbol==ticker) & (df.DATE==pre)]
trace= go.Scatter(x=d.TMT, y=d.ytm, mode='markers',name='Pre-COVID')
fig.add_trace(trace)

# Post-covid
d = df[(df.company_symbol==ticker) & (df.DATE==post)]
trace= go.Scatter(x=d.TMT, y=d.ytm, mode='markers',name='Post-COVID')
fig.add_trace(trace)

fig.update_xaxes(title='Time-to-maturity (years)',tickformat=".1f",type="log")
fig.update_yaxes(title='Yield',tickformat=".2%")
fig.update_layout(title=ticker + ' Yield Curves')
fig.update_layout(legend=dict(yanchor="top", y =0.99, xanchor="left", x=0.01))
fig.show()

## Yields as function of ratings and time-to-maturity

In [38]:
# Simple linear regression of yields on time-to-maturity and numeric ratings
df = df.reset_index().set_index(['company_symbol','ISSUE_ID','DATE'])
y = df['ytm']
X = sm.add_constant(df[['TMT','RATING_NUM']])
res = sm.OLS(y, X, missing='drop').fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                    ytm   R-squared:                       0.240
Model:                            OLS   Adj. R-squared:                  0.240
Method:                 Least Squares   F-statistic:                 1.248e+04
Date:                Wed, 23 Nov 2022   Prob (F-statistic):               0.00
Time:                        09:28:43   Log-Likelihood:             1.1698e+05
No. Observations:               79152   AIC:                        -2.339e+05
Df Residuals:                   79149   BIC:                        -2.339e+05
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0660      0.001    -98.984      0.0

In [39]:
# Ratings dummy variables
rating_dummies = pd.get_dummies(df.RATING_CAT)
df = df.merge(rating_dummies,left_index=True,right_index=True)

# Simple mean
df.groupby('RATING_CAT')['ytm'].aggregate(['mean','count'])

Unnamed: 0_level_0,mean,count
RATING_CAT,Unnamed: 1_level_1,Unnamed: 2_level_1
A,0.016983,28549
AA,0.015263,4257
AAA,0.017976,717
B,0.097776,2454
BB,0.059531,6464
BBB,0.029587,35733
C,0.426454,82
CC,0.579725,55
CCC,0.293988,753
D,0.672747,88


In [40]:
# List all of the ratings categories
ratings_list = rating_dummies.columns.to_list()
ratings_list

['A', 'AA', 'AAA', 'B', 'BB', 'BBB', 'C', 'CC', 'CCC', 'D', 'NR']

In [41]:
# Let's exclude non-rated bonds
df = df[df.RATING_CAT!='NR']

# And we will make AAA bonds the 'base' category
ratings_list.remove('AAA')
ratings_list.remove('NR')
ratings_list


['A', 'AA', 'B', 'BB', 'BBB', 'C', 'CC', 'CCC', 'D']

In [44]:
# Reorder based on quality
ratings_list = ['AA', 'A', 'BBB', 'BB', 'B', 'CCC', 'CC', 'C', 'D']

# Regression of yields on time-to-maturity and ratings dummy variables
y = df['ytm']
X = sm.add_constant(df[['TMT'] + ratings_list])
res = sm.OLS(y, X, missing='drop').fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                    ytm   R-squared:                       0.456
Model:                            OLS   Adj. R-squared:                  0.456
Method:                 Least Squares   F-statistic:                     6636.
Date:                Wed, 23 Nov 2022   Prob (F-statistic):               0.00
Time:                        09:33:15   Log-Likelihood:             1.3024e+05
No. Observations:               79157   AIC:                        -2.605e+05
Df Residuals:                   79146   BIC:                        -2.604e+05
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0056      0.002      3.208      0.0

In [54]:
d = res.params[ratings_list]
print(d)

AA     0.002262
A      0.003476
BBB    0.016712
BB     0.048901
B      0.088202
CCC    0.283808
CC     0.571147
C      0.418008
D      0.661586
dtype: float64


In [58]:
# Plot ratings effects (B and above)
d = d[['AA', 'A', 'BBB', 'BB', 'B']]
trace = go.Bar(x=d.index, y=d.values)
fig = go.Figure()
fig.add_trace(trace)
fig.update_xaxes(title='Rating',tickformat=".1f")
fig.update_yaxes(title='Average Yield Spread to AAA',tickformat=".2%")
fig.show()

### Exercise: How would we add separate effects for each date?