# Set-up

In [None]:
#Imports
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import os
#can use %matplotlib inline if you don't want it to be interactive
%matplotlib notebook

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#change to your intended directory
os.chdir("/kaggle/input/corporate-bonds-indices")

# Creating a combined masterlist
### Can skip section if not interested

In [None]:
#get list of datasets for iteration
for a,b,c in os.walk("/kaggle/input/corporate-bonds-indices"):
    xlist=c
xlist.sort()
#to check
print(xlist)


In [None]:
#merge all the csvs, using the us general bonds total return index dates as the base date column
masterdf=pd.read_csv("US_Gen_TR.csv",usecols=["DATE"])
#use xlist[1:] because the first item is the combined sheet
for item in xlist[1:]:
    #read each csv and take the date and name of file (without ".csv")
    df=pd.read_csv(item,header=0,names=["DATE",item[:-4]])
    #inner join to get only dates that appear in all indices (default pd.merge is inner join not outer join if unspecified)
    masterdf=pd.merge(masterdf,df,on=["DATE"])
masterdf.head()

In [None]:
#write the combined spreadsheet to csv
#if you want only specific columns of the dataframe, use masterdf[["col1","col2","col3"...]].to_csv
os.chdir("/kaggle/working/")
masterdf.to_csv("sample_file.csv")

# Correlation Matrices

In [None]:
os.chdir("/kaggle/input/corporate-bonds-indices")

In [None]:
masterdf=pd.read_csv("Combined Bond Indices  Yields.csv")
masterdf.drop(["Unnamed: 0"],inplace=True,axis=1)
masterdf.head()

In [None]:
#Just for fun! 
#positive kurtosis means the values are more concentrated/peaked than a normal distribution
#negative kurtosis means the values are more spread out than a normal distribution
masterdf.kurt()

# Total Return (TR) correlation matrix

## Simplified version (only US AAA-CCC)

In [None]:
#create baseplot
fig, ax=plt.subplots(figsize=(9,8))
plt.title(label="Corporate Bonds Total Return (TR) Correlation Matrix")

#list of columns to use for the matrix, too cluttered to type out in the params
collist=["US_AAA_TR","US_AA_TR","US_A_TR","US_BBB_TR","US_BB_TR","US_B_TR","US_CCC_TR"]

#for triangle correlation matrix
mask=np.triu(masterdf[collist].corr())
sns.heatmap(masterdf[collist].corr(),annot=True,annot_kws={"size": 10},square=True,cmap="coolwarm",fmt=".3f",mask=mask)

#final formatting
plt.xticks(rotation=60,horizontalalignment="right")
plt.tight_layout()

## Full version

In [None]:
#create baseplot
fig, ax=plt.subplots(figsize=(10,9))
plt.title(label="Corporate Bonds Total Return (TR) Correlation Matrix")

#list of columns to use for the matrix, too cluttered to type out in the params
collist=["US_AAA_TR","US_AA_TR","US_A_TR","US_BBB_TR","US_BB_TR","US_B_TR","US_CCC_TR","EMEA_Gen_TR","EM_Gen_TR","US_Gen_TR"]

#for triangle correlation matrix
mask=np.triu(masterdf[collist].corr())
sns.heatmap(masterdf[collist].corr(),annot=True,annot_kws={"size": 10},square=True,cmap="coolwarm",fmt=".3f",mask=mask)

#annotation
ax.add_patch(plt.Rectangle((2,7),2,1,fill=False,lw=2))
ax.add_patch(plt.Rectangle((3,8),1,1,fill=False,lw=2))
ax.add_patch(plt.Rectangle((2,9),2,1,fill=False,lw=2))
plt.axhline(y=7,xmin=0,xmax=0.7,ls="--",c="purple")
plt.axvline(x=7,ymin=0,ymax=0.3,ls="--",c="purple")

#final formatting
plt.xticks(rotation=60,horizontalalignment="right")
plt.tight_layout()

# Bond yield correlation matrix

In [None]:
#create baseplot
fig, ax=plt.subplots(figsize=(7,6))
plt.title(label="Bond Yield Correlation Matrix")

#too cluttered to type out in the params
collist=["US_AAA_Y","US_AA_Y","US_A_Y","US_BBB_Y","US_BB_Y","US_B_Y","US_CCC_Y","US_Gen_Y","US_High_Y","EM_Good_Y","EM_High_Y"]

#for triangle correlation matrix
mask=np.triu(masterdf[collist].corr())
sns.heatmap(masterdf[collist].corr(),annot=True,annot_kws={"size": 10},square=True,cmap="coolwarm",mask=mask)

#annotations
ax.add_patch(plt.Rectangle((1,7),3,1,fill=False,lw=2))
ax.add_patch(plt.Rectangle((4,8),2,1,fill=False,lw=2))
ax.add_patch(plt.Rectangle((2,9),2,1,fill=False,lw=2))
ax.add_patch(plt.Rectangle((4,10),3,1,fill=False,lw=2))
plt.axhline(y=7,xmin=0,xmax=0.64,ls="--",c="purple")
plt.axvline(x=7,ymin=0,ymax=0.36,ls="--",c="purple")

#final formatting
plt.xticks(rotation=60,horizontalalignment="right")
plt.tight_layout()

In [None]:
#Compare the correlations to the actual yields and see which annotated relationships are reflected in the matrix
#values are randomly sampled using .sample()
masterdf[["DATE","US_AAA_Y","US_AA_Y","US_A_Y","US_BBB_Y","US_BB_Y","US_B_Y","US_CCC_Y","US_Gen_Y","US_High_Y","EM_Good_Y","EM_High_Y"]].sample(10)

Hope this was helpful! Please upvote and comment if you have any suggestions :)